Exemplo n.º 1
0
def merge_data_to_mtz(params, experiments, reflections):
    """Merge data (at each wavelength) and write to an mtz file object."""
    wavelengths = match_wavelengths(experiments)
    if len(wavelengths) > 1:
        logger.info(
            "Multiple wavelengths found: \n%s",
            "\n".join("  Wavlength: %.5f, experiment numbers: %s " %
                      (k, ",".join(map(str, v)))
                      for k, v in wavelengths.items()),
        )
        return make_MAD_merged_mtz_file(params, experiments, reflections,
                                        wavelengths)
    merged_data = merge_and_truncate(params, experiments, reflections)
    return make_merged_mtz_file(*((params, list(wavelengths)[0]) +
                                  merged_data))
Exemplo n.º 2
0
def _export_unmerged_mtz(params, experiments, reflection_table):
    """Export data to unmerged_mtz format (as single file or split by wavelength)."""
    from dials.command_line.export import MTZExporter
    from dials.command_line.export import phil_scope as export_phil_scope

    export_params = export_phil_scope.extract()

    export_params.intensity = ["scale"]
    export_params.mtz.partiality_threshold = params.cut_data.partiality_cutoff
    export_params.mtz.crystal_name = params.output.crystal_name
    if params.cut_data.d_min:
        export_params.mtz.d_min = params.cut_data.d_min
    if len(params.output.unmerged_mtz) > 1:
        from dxtbx.model import ExperimentList

        wavelengths = match_wavelengths(experiments)
        assert len(params.output.unmerged_mtz) == len(wavelengths.keys())
        for filename, wavelength in zip(params.output.unmerged_mtz,
                                        wavelengths.keys()):
            export_params.mtz.hklout = filename
            logger.info("\nSaving output to an unmerged mtz file to %s.",
                        filename)
            exps = ExperimentList()
            ids = []
            for i, exp in enumerate(experiments):
                if i in wavelengths[wavelength]:
                    exps.append(exp)
                    ids.append(exp.identifier)
            exporter = MTZExporter(
                export_params,
                exps,
                [reflection_table.select_on_experiment_identifiers(ids)],
            )
            exporter.export()
    else:
        logger.info(
            "\nSaving output to an unmerged mtz file to %s.",
            params.output.unmerged_mtz[0],
        )
        export_params.mtz.hklout = params.output.unmerged_mtz[0]
        exporter = MTZExporter(export_params, experiments, [reflection_table])
        exporter.export()
Exemplo n.º 3
0
def _export_multi_merged_mtz(params, experiments, reflection_table):
    from dxtbx.model import ExperimentList

    wavelengths = match_wavelengths(experiments)
    assert len(params.output.merged_mtz) == len(wavelengths.keys())
    for filename, wavelength in zip(params.output.merged_mtz,
                                    wavelengths.keys()):
        exps = ExperimentList()
        ids = []
        for i, exp in enumerate(experiments):
            if i in wavelengths[wavelength]:
                exps.append(exp)
                ids.append(exp.identifier)
        refls = reflection_table.select_on_experiment_identifiers(ids)
        scaled_array = scaled_data_as_miller_array([refls], exps)
        merged = scaled_array.merge_equivalents(
            use_internal_variance=params.output.use_internal_variance).array()
        merged_anom = (scaled_array.as_anomalous_array().merge_equivalents(
            use_internal_variance=params.output.use_internal_variance).array())
        mtz_file = make_merged_mtz_file(merged, merged_anom)
        logger.info("\nSaving output to a merged mtz file to %s.\n", filename)
        mtz_file.write(filename)
Exemplo n.º 4
0
    def run(self):
        """Execute the script."""

        # Parse the command line
        params, _ = self.parser.parse_args(show_diff_phil=True)

        # Try to load the models and data
        if not params.input.experiments:
            print("No Experiments found in the input")
            self.parser.print_help()
            return
        if params.input.reflections:
            if len(params.input.reflections) != len(params.input.experiments):
                raise Sorry(
                    "The number of input reflections files does not match the "
                    "number of input experiments"
                )

        experiments = flatten_experiments(params.input.experiments)
        if params.input.reflections:
            reflections = flatten_reflections(params.input.reflections)[0]
        else:
            reflections = None

        experiments_template = functools.partial(
            params.output.template.format,
            prefix=params.output.experiments_prefix,
            maxindexlength=len(str(len(experiments))),
            extension="expt",
        )

        reflections_template = functools.partial(
            params.output.template.format,
            prefix=params.output.reflections_prefix,
            maxindexlength=len(str(len(experiments))),
            extension="refl",
        )

        if params.output.chunk_sizes:
            if not sum(params.output.chunk_sizes) == len(experiments):
                raise Sorry(
                    "Sum of chunk sizes list (%s) not equal to number of experiments (%s)"
                    % (sum(params.output.chunk_sizes), len(experiments))
                )

        if params.by_wavelength:
            if reflections:
                if not reflections.experiment_identifiers():
                    raise Sorry(
                        "Unable to split by wavelength as no experiment "
                        "identifiers are set in the reflection table."
                    )
            if all(experiments.identifiers() == ""):
                raise Sorry(
                    "Unable to split by wavelength as no experiment "
                    "identifiers are set in the experiment list."
                )

            wavelengths = match_wavelengths(experiments)
            for i, wl in enumerate(sorted(wavelengths.keys())):
                expids = []
                new_exps = ExperimentList()
                exp_nos = wavelengths[wl]
                for j in exp_nos:
                    expids.append(experiments[j].identifier)  # string
                    new_exps.append(experiments[j])

                experiment_filename = experiments_template(index=i)
                print(
                    "Saving experiments with wavelength %s to %s"
                    % (wl, experiment_filename)
                )
                new_exps.as_json(experiment_filename)
                if reflections:
                    refls = reflections.select_on_experiment_identifiers(expids)
                    reflections_filename = reflections_template(index=i)
                    print(
                        "Saving reflections with wavelength %s to %s"
                        % (wl, reflections_filename)
                    )
                    refls.as_file(reflections_filename)

        elif params.by_detector:
            assert (
                not params.output.chunk_size
            ), "chunk_size + by_detector is not implemented"
            if reflections is None:
                split_data = {
                    detector: {"experiments": ExperimentList()}
                    for detector in experiments.detectors()
                }
            else:
                split_data = {
                    detector: {
                        "experiments": ExperimentList(),
                        "reflections": flex.reflection_table(),
                    }
                    for detector in experiments.detectors()
                }

            for i, experiment in enumerate(experiments):
                split_expt_id = experiments.detectors().index(experiment.detector)
                experiment_filename = experiments_template(index=split_expt_id)
                print("Adding experiment %d to %s" % (i, experiment_filename))
                split_data[experiment.detector]["experiments"].append(experiment)
                if reflections is not None:
                    reflections_filename = reflections_template(index=split_expt_id)
                    print(
                        "Adding reflections for experiment %d to %s"
                        % (i, reflections_filename)
                    )
                    if reflections.experiment_identifiers().keys():
                        # first find which id value corresponds to experiment in question
                        identifier = experiment.identifier
                        id_ = None
                        for k in reflections.experiment_identifiers().keys():
                            if reflections.experiment_identifiers()[k] == identifier:
                                id_ = k
                                break
                        if id_ is None:
                            raise Sorry(
                                "Unable to find id matching experiment identifier in reflection table."
                            )
                        ref_sel = reflections.select(reflections["id"] == id_)
                        # now reset ids and reset/update identifiers map
                        for k in ref_sel.experiment_identifiers().keys():
                            del ref_sel.experiment_identifiers()[k]
                        new_id = len(split_data[experiment.detector]["experiments"]) - 1
                        ref_sel["id"] = flex.int(len(ref_sel), new_id)
                        ref_sel.experiment_identifiers()[new_id] = identifier
                    else:
                        ref_sel = reflections.select(reflections["id"] == i)
                        ref_sel["id"] = flex.int(
                            len(ref_sel),
                            len(split_data[experiment.detector]["experiments"]) - 1,
                        )
                    split_data[experiment.detector]["reflections"].extend(ref_sel)

            for i, detector in enumerate(experiments.detectors()):
                experiment_filename = experiments_template(index=i)
                print("Saving experiment %d to %s" % (i, experiment_filename))
                split_data[detector]["experiments"].as_json(experiment_filename)

                if reflections is not None:
                    reflections_filename = reflections_template(index=i)
                    print(
                        "Saving reflections for experiment %d to %s"
                        % (i, reflections_filename)
                    )
                    split_data[detector]["reflections"].as_file(reflections_filename)
        elif params.output.chunk_size or params.output.chunk_sizes:

            def save_chunk(chunk_id, expts, refls):
                experiment_filename = experiments_template(index=chunk_id)
                print("Saving chunk %d to %s" % (chunk_id, experiment_filename))
                expts.as_json(experiment_filename)
                if refls is not None:
                    reflections_filename = reflections_template(index=chunk_id)
                    print(
                        "Saving reflections for chunk %d to %s"
                        % (chunk_id, reflections_filename)
                    )
                    refls.as_file(reflections_filename)

            chunk_counter = 0
            chunk_expts = ExperimentList()
            if reflections:
                chunk_refls = flex.reflection_table()
            else:
                chunk_refls = None
            for i, experiment in enumerate(experiments):
                chunk_expts.append(experiment)
                if reflections:
                    if reflections.experiment_identifiers().keys():
                        # first find which id value corresponds to experiment in question
                        identifier = experiment.identifier
                        id_ = None
                        for k in reflections.experiment_identifiers().keys():
                            if reflections.experiment_identifiers()[k] == identifier:
                                id_ = k
                                break
                        if id_ is None:
                            raise Sorry(
                                "Unable to find id matching experiment identifier in reflection table."
                            )
                        ref_sel = reflections.select(reflections["id"] == id_)
                        # now reset ids and reset/update identifiers map
                        for k in ref_sel.experiment_identifiers().keys():
                            del ref_sel.experiment_identifiers()[k]
                        new_id = len(chunk_expts) - 1
                        ref_sel["id"] = flex.int(len(ref_sel), new_id)
                        ref_sel.experiment_identifiers()[new_id] = identifier
                    else:
                        ref_sel = reflections.select(reflections["id"] == i)
                        ref_sel["id"] = flex.int(len(ref_sel), len(chunk_expts) - 1)
                    chunk_refls.extend(ref_sel)
                if params.output.chunk_sizes:
                    chunk_limit = params.output.chunk_sizes[chunk_counter]
                else:
                    chunk_limit = params.output.chunk_size
                if len(chunk_expts) == chunk_limit:
                    save_chunk(chunk_counter, chunk_expts, chunk_refls)
                    chunk_counter += 1
                    chunk_expts = ExperimentList()
                    if reflections:
                        chunk_refls = flex.reflection_table()
                    else:
                        chunk_refls = None
            if len(chunk_expts) > 0:
                save_chunk(chunk_counter, chunk_expts, chunk_refls)
        else:
            for i, experiment in enumerate(experiments):

                experiment_filename = experiments_template(index=i)
                print("Saving experiment %d to %s" % (i, experiment_filename))
                ExperimentList([experiment]).as_json(experiment_filename)

                if reflections is not None:
                    reflections_filename = reflections_template(index=i)
                    print(
                        "Saving reflections for experiment %d to %s"
                        % (i, reflections_filename)
                    )
                    ref_sel = reflections.select(reflections["id"] == i)
                    if ref_sel.experiment_identifiers().keys():
                        identifier = ref_sel.experiment_identifiers()[i]
                        for k in ref_sel.experiment_identifiers().keys():
                            del ref_sel.experiment_identifiers()[k]
                        ref_sel["id"] = flex.int(ref_sel.size(), 0)
                        ref_sel.experiment_identifiers()[0] = identifier
                    else:
                        ref_sel["id"] = flex.int(len(ref_sel), 0)
                    ref_sel.as_file(reflections_filename)

        return
Exemplo n.º 5
0
def merge_data_to_mtz(params, experiments, reflections):
    """Merge data (at each wavelength) and write to an mtz file object."""
    wavelengths = match_wavelengths(
        experiments,
        absolute_tolerance=params.wavelength_tolerance,
    )  # wavelengths is an ordered dict
    mtz_datasets = [
        MTZDataClass(wavelength=w, project_name=params.output.project_name)
        for w in wavelengths.keys()
    ]
    dataset_names = params.output.dataset_names
    crystal_names = params.output.crystal_names

    # check if best_unit_cell is set.
    best_unit_cell = params.best_unit_cell
    if not best_unit_cell:
        best_unit_cell = determine_best_unit_cell(experiments)
    reflections[0]["d"] = best_unit_cell.d(reflections[0]["miller_index"])
    for expt in experiments:
        expt.crystal.unit_cell = best_unit_cell

    if len(wavelengths) > 1:
        logger.info(
            "Multiple wavelengths found: \n%s",
            "\n".join(
                "  Wavlength: %.5f, experiment numbers: %s "
                % (k, ",".join(map(str, v)))
                for k, v in wavelengths.items()
            ),
        )
        if not dataset_names or len(dataset_names) != len(wavelengths):
            logger.info(
                "Unequal number of dataset names and wavelengths, using default naming."
            )
            dataset_names = [None] * len(wavelengths)
        if not crystal_names or len(crystal_names) != len(wavelengths):
            logger.info(
                "Unequal number of crystal names and wavelengths, using default naming."
            )
            crystal_names = [None] * len(wavelengths)
        experiments_subsets = []
        reflections_subsets = []
        for dataset, dname, cname in zip(mtz_datasets, dataset_names, crystal_names):
            dataset.dataset_name = dname
            dataset.crystal_name = cname
        for exp_nos in wavelengths.values():
            expids = [experiments[i].identifier for i in exp_nos]
            experiments_subsets.append(
                ExperimentList([experiments[i] for i in exp_nos])
            )
            reflections_subsets.append(
                reflections[0].select_on_experiment_identifiers(expids)
            )
    else:
        mtz_datasets[0].dataset_name = dataset_names[0]
        mtz_datasets[0].crystal_name = crystal_names[0]
        experiments_subsets = [experiments]
        reflections_subsets = reflections

    # merge and truncate the data for each wavelength group
    for experimentlist, reflection_table, mtz_dataset in zip(
        experiments_subsets, reflections_subsets, mtz_datasets
    ):
        # First generate two merge_equivalents objects, collect merging stats
        merged, merged_anomalous, stats_summary = merge(
            experimentlist,
            reflection_table,
            d_min=params.d_min,
            d_max=params.d_max,
            combine_partials=params.combine_partials,
            partiality_threshold=params.partiality_threshold,
            best_unit_cell=best_unit_cell,
            anomalous=params.anomalous,
            assess_space_group=params.assess_space_group,
            n_bins=params.merging.n_bins,
            use_internal_variance=params.merging.use_internal_variance,
        )

        merged_array = merged.array()
        # Save the relevant data in the mtz_dataset dataclass
        # This will add the data for IMEAN/SIGIMEAN
        mtz_dataset.merged_array = merged_array
        if merged_anomalous:
            merged_anomalous_array = merged_anomalous.array()
            # This will add the data for I(+), I(-), SIGI(+), SIGI(-), N(+), N(-)
            mtz_dataset.merged_anomalous_array = merged_anomalous_array
            mtz_dataset.multiplicities = merged_anomalous.redundancies()
        else:
            merged_anomalous_array = None
            # This will add the data for N
            mtz_dataset.multiplicities = merged.redundancies()

        if params.anomalous:
            merged_intensities = merged_anomalous_array
        else:
            merged_intensities = merged_array

        anom_amplitudes = None
        if params.truncate:
            amplitudes, anom_amplitudes, dano = truncate(merged_intensities)
            # This will add the data for F, SIGF
            mtz_dataset.amplitudes = amplitudes
            # This will add the data for F(+), F(-), SIGF(+), SIGF(-)
            mtz_dataset.anomalous_amplitudes = anom_amplitudes
            # This will add the data for DANO, SIGDANO
            mtz_dataset.dano = dano

        # print out analysis statistics
        show_wilson_scaling_analysis(merged_intensities)
        if stats_summary:
            logger.info(stats_summary)
        if anom_amplitudes:
            logger.info(make_dano_table(anom_amplitudes))

    # pass the dataclasses to an MTZ writer to generate the mtz file and return.
    return make_merged_mtz_file(mtz_datasets)
Exemplo n.º 6
0
def merge_data_to_mtz(params, experiments, reflections):
    """Merge data (at each wavelength) and write to an mtz file object."""
    wavelengths = match_wavelengths(
        experiments)  # wavelengths is an ordered dict
    mtz_datasets = [
        MTZDataClass(wavelength=w, project_name=params.output.project_name)
        for w in wavelengths.keys()
    ]
    dataset_names = params.output.dataset_names
    crystal_names = params.output.crystal_names
    if len(wavelengths) > 1:
        logger.info(
            "Multiple wavelengths found: \n%s",
            "\n".join("  Wavlength: %.5f, experiment numbers: %s " %
                      (k, ",".join(map(str, v)))
                      for k, v in wavelengths.items()),
        )
        if not dataset_names or len(dataset_names) != len(wavelengths):
            logger.info(
                "Unequal number of dataset names and wavelengths, using default naming."
            )
            dataset_names = [None] * len(wavelengths)
        if not crystal_names or len(crystal_names) != len(wavelengths):
            logger.info(
                "Unequal number of crystal names and wavelengths, using default naming."
            )
            crystal_names = [None] * len(wavelengths)
        experiments_subsets = []
        reflections_subsets = []
        for dataset, dname, cname in zip(mtz_datasets, dataset_names,
                                         crystal_names):
            dataset.dataset_name = dname
            dataset.crystal_name = cname
        for exp_nos in wavelengths.values():
            expids = [experiments[i].identifier for i in exp_nos]
            experiments_subsets.append(
                ExperimentList([experiments[i] for i in exp_nos]))
            reflections_subsets.append(
                reflections[0].select_on_experiment_identifiers(expids))
    else:
        mtz_datasets[0].dataset_name = dataset_names[0]
        mtz_datasets[0].crystal_name = crystal_names[0]
        experiments_subsets = [experiments]
        reflections_subsets = reflections

    for experimentlist, reflection_table, mtz_dataset in zip(
            experiments_subsets, reflections_subsets, mtz_datasets):
        # merge and truncate the data
        merged_array, merged_anomalous_array, stats_summary = merge(
            experimentlist,
            reflection_table,
            d_min=params.d_min,
            d_max=params.d_max,
            combine_partials=params.combine_partials,
            partiality_threshold=params.partiality_threshold,
            anomalous=params.anomalous,
            assess_space_group=params.assess_space_group,
            n_bins=params.merging.n_bins,
            use_internal_variance=params.merging.use_internal_variance,
        )
        mtz_dataset.merged_array = merged_array
        mtz_dataset.merged_anomalous_array = merged_anomalous_array
        if params.anomalous:
            merged_intensities = merged_anomalous_array
        else:
            merged_intensities = merged_array

        if params.truncate:
            amplitudes, anomalous_amplitudes = truncate(merged_intensities)
            mtz_dataset.amplitudes = amplitudes
            mtz_dataset.anomalous_amplitudes = anomalous_amplitudes
        show_wilson_scaling_analysis(merged_intensities)
        if stats_summary:
            logger.info(stats_summary)

    return make_merged_mtz_file(mtz_datasets)