示例#1
0
    def print_scaling_summary(self, scaling_script):
        """Log summary information after scaling."""
        if ScalingModelObserver().data:
            logger.info(ScalingModelObserver().return_model_error_summary())
        valid_ranges = get_valid_image_ranges(scaling_script.experiments)
        image_ranges = get_image_ranges(scaling_script.experiments)
        msg = []
        for (img, valid, refl) in zip(image_ranges, valid_ranges,
                                      scaling_script.reflections):
            if valid:
                if len(valid) > 1 or valid[0][0] != img[0] or valid[-1][
                        1] != img[1]:
                    msg.append(
                        "Excluded images for experiment id: %s, image range: %s, limited range: %s"
                        % (
                            refl.experiment_identifiers().keys()[0],
                            list(img),
                            list(valid),
                        ))
        if msg:
            msg = ["Summary of image ranges removed:"] + msg
            logger.info("\n".join(msg))

        # report on partiality of dataset
        partials = flex.double()
        for r in scaling_script.reflections:
            if "partiality" in r:
                partials.extend(r["partiality"])
        not_full_sel = partials < 0.99
        not_zero_sel = partials > 0.01
        gt_half = partials > 0.5
        lt_half = partials < 0.5
        partial_gt_half_sel = not_full_sel & gt_half
        partial_lt_half_sel = not_zero_sel & lt_half
        logger.info("Summary of dataset partialities")
        header = ["Partiality (p)", "n_refl"]
        rows = [
            ["all reflections", str(partials.size())],
            ["p > 0.99", str(not_full_sel.count(False))],
            ["0.5 < p < 0.99",
             str(partial_gt_half_sel.count(True))],
            ["0.01 < p < 0.5",
             str(partial_lt_half_sel.count(True))],
            ["p < 0.01", str(not_zero_sel.count(False))],
        ]
        logger.info(tabulate(rows, header))
        logger.info(
            """
Reflections below a partiality_cutoff of %s are not considered for any
part of the scaling analysis or for the reporting of merging statistics.
Additionally, if applicable, only reflections with a min_partiality > %s
were considered for use when refining the scaling model.
""",
            scaling_script.params.cut_data.partiality_cutoff,
            scaling_script.params.reflection_selection.min_partiality,
        )
        if MergingStatisticsObserver().data:
            logger.info(
                make_merging_statistics_summary(
                    MergingStatisticsObserver().data["statistics"]))
示例#2
0
def test_get_image_ranges():
    """Test for namesake function"""
    scan = Scan(image_range=[1, 200], oscillation=[0.0, 1.0])
    exp1 = Experiment(scan=scan)
    exp2 = Experiment()
    experiments = [exp1, exp2]
    image_ranges = get_image_ranges(experiments)
    assert image_ranges == [(1, 200), (0, 0)]
示例#3
0
def export_mtz(integrated_data, experiment_list, params):
    """Export data from integrated_data corresponding to experiment_list to an
    MTZ file hklout."""

    # if mtz filename is auto, then choose scaled.mtz or integrated.mtz
    if params.mtz.hklout in (None, Auto, "auto"):
        if ("intensity.scale.value"
                in integrated_data) and ("intensity.scale.variance"
                                         in integrated_data):
            params.mtz.hklout = "scaled.mtz"
            logger.info(
                "Data appears to be scaled, setting mtz.hklout = 'scaled.mtz'")
        else:
            params.mtz.hklout = "integrated.mtz"
            logger.info(
                "Data appears to be unscaled, setting mtz.hklout = 'integrated.mtz'"
            )

    # First get the experiment identifier information out of the data
    expids_in_table = integrated_data.experiment_identifiers()
    if not list(expids_in_table.keys()):
        reflection_tables = parse_multiple_datasets([integrated_data])
        experiment_list, refl_list = assign_unique_identifiers(
            experiment_list, reflection_tables)
        integrated_data = flex.reflection_table()
        for reflections in refl_list:
            integrated_data.extend(reflections)
        expids_in_table = integrated_data.experiment_identifiers()
    integrated_data.assert_experiment_identifiers_are_consistent(
        experiment_list)
    expids_in_list = list(experiment_list.identifiers())

    # Convert experiment_list to a real python list or else identity assumptions
    # fail like:
    #   assert experiment_list[0] is experiment_list[0]
    # And assumptions about added attributes break
    experiment_list = list(experiment_list)

    # Validate multi-experiment assumptions
    if len(experiment_list) > 1:
        # All experiments should match crystals, or else we need multiple crystals/datasets
        if not all(x.crystal == experiment_list[0].crystal
                   for x in experiment_list[1:]):
            logger.warning(
                "Experiment crystals differ. Using first experiment crystal for file-level data."
            )

        wavelengths = match_wavelengths(experiment_list)
        if len(wavelengths.keys()) > 1:
            logger.info(
                "Multiple wavelengths found: \n%s",
                "\n".join("  Wavlength: %.5f, experiment numbers: %s " %
                          (k, ",".join(map(str, v)))
                          for k, v in wavelengths.items()),
            )
    else:
        wavelengths = OrderedDict(
            {experiment_list[0].beam.get_wavelength(): [0]})

    # also only work correctly with one panel (for the moment)
    if any(len(experiment.detector) != 1 for experiment in experiment_list):
        logger.warning("Ignoring multiple panels in output MTZ")

    best_unit_cell = params.mtz.best_unit_cell
    if best_unit_cell is None:
        best_unit_cell = determine_best_unit_cell(experiment_list)
    integrated_data["d"] = best_unit_cell.d(integrated_data["miller_index"])

    # Clean up the data with the passed in options
    integrated_data = filter_reflection_table(
        integrated_data,
        intensity_choice=params.intensity,
        partiality_threshold=params.mtz.partiality_threshold,
        combine_partials=params.mtz.combine_partials,
        min_isigi=params.mtz.min_isigi,
        filter_ice_rings=params.mtz.filter_ice_rings,
        d_min=params.mtz.d_min,
    )

    # get batch offsets and image ranges - even for scanless experiments
    batch_offsets = [
        expt.scan.get_batch_offset() for expt in experiment_list
        if expt.scan is not None
    ]
    unique_offsets = set(batch_offsets)
    if len(set(unique_offsets)) <= 1:
        logger.debug("Calculating new batches")
        batch_offsets = calculate_batch_offsets(experiment_list)
        batch_starts = [
            e.scan.get_image_range()[0] if e.scan else 0
            for e in experiment_list
        ]
        effective_offsets = [
            o + s for o, s in zip(batch_offsets, batch_starts)
        ]
        unique_offsets = set(effective_offsets)
    else:
        logger.debug("Keeping existing batches")
    image_ranges = get_image_ranges(experiment_list)
    if len(unique_offsets) != len(batch_offsets):

        raise ValueError("Duplicate batch offsets detected: %s" % ", ".join(
            str(item)
            for item, count in Counter(batch_offsets).items() if count > 1))

    # Create the mtz file
    mtz_writer = UnmergedMTZWriter(
        experiment_list[0].crystal.get_space_group())

    # FIXME TODO for more than one experiment into an MTZ file:
    #
    # - add an epoch (or recover an epoch) from the scan and add this as an extra
    #   column to the MTZ file for scaling, so we know that the two lattices were
    #   integrated at the same time
    # ✓ decide a sensible BATCH increment to apply to the BATCH value between
    #   experiments and add this

    for id_ in expids_in_table.keys():
        # Grab our subset of the data
        loc = expids_in_list.index(
            expids_in_table[id_])  # get strid and use to find loc in list
        experiment = experiment_list[loc]
        if len(list(wavelengths.keys())) > 1:
            for i, (wl, exps) in enumerate(wavelengths.items()):
                if loc in exps:
                    wavelength = wl
                    dataset_id = i + 1
                    break
        else:
            wavelength = list(wavelengths.keys())[0]
            dataset_id = 1
        reflections = integrated_data.select(integrated_data["id"] == id_)
        batch_offset = batch_offsets[loc]
        image_range = image_ranges[loc]
        reflections = assign_batches_to_reflections([reflections],
                                                    [batch_offset])[0]
        experiment.data = dict(reflections)

        s0n = matrix.col(experiment.beam.get_s0()).normalize().elems
        logger.debug("Beam vector: %.4f %.4f %.4f" % s0n)

        mtz_writer.add_batch_list(
            image_range,
            experiment,
            wavelength,
            dataset_id,
            batch_offset=batch_offset,
            force_static_model=params.mtz.force_static_model,
        )

        # Create the batch offset array. This gives us an experiment (id)-dependent
        # batch offset to calculate the correct batch from image number.
        experiment.data["batch_offset"] = flex.int(len(experiment.data["id"]),
                                                   batch_offset)

        # Calculate whether we have a ROT value for this experiment, and set the column
        _, _, z = experiment.data["xyzcal.px"].parts()
        if experiment.scan:
            experiment.data[
                "ROT"] = experiment.scan.get_angle_from_array_index(z)
        else:
            experiment.data["ROT"] = z

    mtz_writer.add_crystal(
        crystal_name=params.mtz.crystal_name,
        project_name=params.mtz.project_name,
        unit_cell=best_unit_cell,
    )
    # Note: add unit cell here as may have changed basis since creating mtz.
    # For multi-wave unmerged mtz, we add an empty dataset for each wavelength,
    # but only write the data into the final dataset (for unmerged the batches
    # link the unmerged data to the individual wavelengths).
    for wavelength in wavelengths:
        mtz_writer.add_empty_dataset(wavelength)

    # Combine all of the experiment data columns before writing
    combined_data = {
        k: v.deep_copy()
        for k, v in experiment_list[0].data.items()
    }
    for experiment in experiment_list[1:]:
        for k, v in experiment.data.items():
            combined_data[k].extend(v)
    # ALL columns must be the same length
    assert len({len(v)
                for v in combined_data.values()
                }) == 1, "Column length mismatch"
    assert len(combined_data["id"]) == len(
        integrated_data["id"]), "Lost rows in split/combine"

    # Write all the data and columns to the mtz file
    mtz_writer.write_columns(combined_data)

    logger.info("Saving {} integrated reflections to {}".format(
        len(combined_data["id"]), params.mtz.hklout))
    mtz_file = mtz_writer.mtz_file
    mtz_file.write(params.mtz.hklout)

    return mtz_file
示例#4
0
def print_scaling_summary(script):
    """Log summary information after scaling."""
    logger.info(print_scaling_model_error_summary(script.experiments))
    valid_ranges = get_valid_image_ranges(script.experiments)
    image_ranges = get_image_ranges(script.experiments)
    msg = []
    for (img, valid, refl) in zip(image_ranges, valid_ranges,
                                  script.reflections):
        if valid:
            if len(valid
                   ) > 1 or valid[0][0] != img[0] or valid[-1][1] != img[1]:
                msg.append(
                    "Excluded images for experiment id: %s, image range: %s, limited range: %s"
                    % (
                        refl.experiment_identifiers().keys()[0],
                        list(img),
                        list(valid),
                    ))
    if msg:
        msg = ["Summary of image ranges removed:"] + msg
        logger.info("\n".join(msg))

    # report on partiality of dataset
    partials = flex.double()
    for r in script.reflections:
        if "partiality" in r:
            partials.extend(r["partiality"])
    not_full_sel = partials < 0.99
    not_zero_sel = partials > 0.01
    gt_half = partials > 0.5
    lt_half = partials < 0.5
    partial_gt_half_sel = not_full_sel & gt_half
    partial_lt_half_sel = not_zero_sel & lt_half
    logger.info("Summary of dataset partialities")
    header = ["Partiality (p)", "n_refl"]
    rows = [
        ["all reflections", str(partials.size())],
        ["p > 0.99", str(not_full_sel.count(False))],
        ["0.5 < p < 0.99",
         str(partial_gt_half_sel.count(True))],
        ["0.01 < p < 0.5",
         str(partial_lt_half_sel.count(True))],
        ["p < 0.01", str(not_zero_sel.count(False))],
    ]
    logger.info(tabulate(rows, header))
    logger.info(
        """
Reflections below a partiality_cutoff of %s are not considered for any
part of the scaling analysis or for the reporting of merging statistics.
Additionally, if applicable, only reflections with a min_partiality > %s
were considered for use when refining the scaling model.
""",
        script.params.cut_data.partiality_cutoff,
        script.params.reflection_selection.min_partiality,
    )
    stats = script.merging_statistics_result
    if stats:
        anom_stats, cut_stats, cut_anom_stats = (None, None, None)
        if not script.scaled_miller_array.space_group().is_centric():
            anom_stats = script.anom_merging_statistics_result
        logger.info(make_merging_statistics_summary(stats))
        try:
            d_min = resolution_cc_half(stats, limit=0.3).d_min
        except RuntimeError as e:
            logger.debug(f"Resolution fit failed: {e}")
        else:
            max_current_res = stats.bins[-1].d_min
            if d_min and d_min - max_current_res > 0.005:
                logger.info(
                    "Resolution limit suggested from CC" + "\u00BD" +
                    " fit (limit CC" + "\u00BD" + "=0.3): %.2f",
                    d_min,
                )
                try:
                    cut_stats, cut_anom_stats = merging_stats_from_scaled_array(
                        script.scaled_miller_array.resolution_filter(
                            d_min=d_min),
                        script.params.output.merging.nbins,
                        script.params.output.use_internal_variance,
                    )
                except DialsMergingStatisticsError:
                    pass
                else:
                    if script.scaled_miller_array.space_group().is_centric():
                        cut_anom_stats = None
        logger.info(
            table_1_summary(stats, anom_stats, cut_stats, cut_anom_stats))