示例#1
0
    def run(self, experiments, reflections):
        self.logger.log("Rebalancing input load -- %s method..." %
                        self.params.input.parallel_file_load.balance)
        if self.mpi_helper.rank == 0:
            self.logger.main_log("Rebalancing input load -- %s method..." %
                                 self.params.input.parallel_file_load.balance)

        if self.params.input.parallel_file_load.balance == "global":
            new_experiments, new_reflections = self.distribute_over_ranks(
                experiments, reflections, self.mpi_helper.comm,
                self.mpi_helper.size)
        elif self.params.input.parallel_file_load.balance == "per_node":
            mpi_color = int(
                self.mpi_helper.rank /
                self.params.input.parallel_file_load.ranks_per_node)
            mpi_new_rank = self.mpi_helper.rank % self.params.input.parallel_file_load.ranks_per_node
            mpi_split_comm = self.mpi_helper.comm.Split(
                mpi_color, mpi_new_rank)
            new_experiments, new_reflections = self.distribute_over_ranks(
                experiments, reflections, mpi_split_comm,
                self.params.input.parallel_file_load.ranks_per_node)

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(new_experiments, new_reflections)

        return new_experiments, new_reflections
示例#2
0
    def run(self, experiments, reflections):
        if 'significance_filter' in self.params.select.algorithm:
            experiments, reflections = self.apply_significance_filter(
                experiments, reflections)

        # Do we have any data left?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(experiments, reflections)

        return experiments, reflections
示例#3
0
    def run(self, experiments, reflections):
        self.logger.log("Rebalancing input load -- %s method..." %
                        self.params.input.parallel_file_load.balance)
        if self.mpi_helper.rank == 0:
            self.logger.main_log("Rebalancing input load -- %s method..." %
                                 self.params.input.parallel_file_load.balance)

        if self.params.input.parallel_file_load.balance == "global":
            new_experiments, new_reflections = self.distribute_over_ranks(
                experiments, reflections, self.mpi_helper.comm,
                self.mpi_helper.size)
        elif self.params.input.parallel_file_load.balance == "per_node":
            mpi_color = int(
                self.mpi_helper.rank /
                self.params.input.parallel_file_load.ranks_per_node)
            mpi_new_rank = self.mpi_helper.rank % self.params.input.parallel_file_load.ranks_per_node
            mpi_split_comm = self.mpi_helper.comm.Split(
                mpi_color, mpi_new_rank)
            new_experiments, new_reflections = self.distribute_over_ranks(
                experiments, reflections, mpi_split_comm,
                self.params.input.parallel_file_load.ranks_per_node)

        if self.params.input.parallel_file_load.reset_experiment_id_column:
            self.logger.log('Starting id column reset')
            id_map = new_reflections.experiment_identifiers()
            reverse_map = {}
            for expt_id, experiment in enumerate(new_experiments):
                id_map[expt_id] = experiment.identifier
                reverse_map[experiment.identifier] = expt_id
            id_col = new_reflections['id']
            ident_col = new_reflections['exp_id']
            for i in range(len(new_reflections)):
                id_col[i] = reverse_map[ident_col[i]]
            self.logger.log('Column reset done')

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(new_experiments, new_reflections)

        return new_experiments, new_reflections
示例#4
0
    def run(self, experiments, reflections):
        self.logger.log_step_time("SCALE_FRAMES")
        if self.params.scaling.algorithm != "mark0":  # mark1 implies no scaling/post-refinement
            self.logger.log("No scaling was done")
            if self.mpi_helper.rank == 0:
                self.logger.main_log("No scaling was done")
            return experiments, reflections

        new_experiments = ExperimentList()
        new_reflections = flex.reflection_table()

        # scale experiments, one at a time. Reject experiments that do not correlate with the reference or fail to scale.
        results = []
        slopes = []
        correlations = []
        high_res_experiments = 0
        experiments_rejected_because_of_low_signal = 0
        experiments_rejected_because_of_low_correlation_with_reference = 0

        target_symm = symmetry(
            unit_cell=self.params.scaling.unit_cell,
            space_group_info=self.params.scaling.space_group)
        for experiment in experiments:
            exp_reflections = reflections.select(
                reflections['exp_id'] == experiment.identifier)

            # Build a miller array for the experiment reflections
            exp_miller_indices = miller.set(
                target_symm, exp_reflections['miller_index_asymmetric'], True)
            exp_intensities = miller.array(
                exp_miller_indices, exp_reflections['intensity.sum.value'],
                flex.double(
                    flex.sqrt(exp_reflections['intensity.sum.variance'])))

            model_intensities = self.params.scaling.i_model

            # Extract an array of HKLs from the model to match the experiment HKLs
            matching_indices = miller.match_multi_indices(
                miller_indices_unique=model_intensities.indices(),
                miller_indices=exp_intensities.indices())

            # Least squares
            result = self.fit_experiment_to_reference(model_intensities,
                                                      exp_intensities,
                                                      matching_indices)

            if result.error == scaling_result.err_low_signal:
                experiments_rejected_because_of_low_signal += 1
                continue
            elif result.error == scaling_result.err_low_correlation:
                experiments_rejected_because_of_low_correlation_with_reference += 1
                continue

            slopes.append(result.slope)
            correlations.append(result.correlation)

            if self.params.output.log_level == 0:
                self.logger.log(
                    "Experiment ID: %s; Slope: %f; Correlation %f" %
                    (experiment.identifier, result.slope, result.correlation))

            # count high resolution experiments
            if exp_intensities.d_min() <= self.params.merging.d_min:
                high_res_experiments += 1

            # apply scale factors
            if not self.params.postrefinement.enable:
                exp_reflections['intensity.sum.value'] *= result.slope
                exp_reflections['intensity.sum.variance'] *= (result.slope**2)

            new_experiments.append(experiment)
            new_reflections.extend(exp_reflections)

        rejected_experiments = len(experiments) - len(new_experiments)
        assert rejected_experiments == experiments_rejected_because_of_low_signal + \
                                        experiments_rejected_because_of_low_correlation_with_reference

        reflections_removed_because_of_rejected_experiments = reflections.size(
        ) - new_reflections.size()

        self.logger.log("Experiments rejected because of low signal: %d" %
                        experiments_rejected_because_of_low_signal)
        self.logger.log(
            "Experiments rejected because of low correlation with reference: %d"
            % experiments_rejected_because_of_low_correlation_with_reference)
        self.logger.log(
            "Reflections rejected because of rejected experiments: %d" %
            reflections_removed_because_of_rejected_experiments)
        self.logger.log("High resolution experiments: %d" %
                        high_res_experiments)
        if self.params.postrefinement.enable:
            self.logger.log(
                "Note: scale factors were not applied, because postrefinement is enabled"
            )

        # MPI-reduce all counts
        comm = self.mpi_helper.comm
        MPI = self.mpi_helper.MPI
        total_experiments_rejected_because_of_low_signal = comm.reduce(
            experiments_rejected_because_of_low_signal, MPI.SUM, 0)
        total_experiments_rejected_because_of_low_correlation_with_reference = comm.reduce(
            experiments_rejected_because_of_low_correlation_with_reference,
            MPI.SUM, 0)
        total_reflections_removed_because_of_rejected_experiments = comm.reduce(
            reflections_removed_because_of_rejected_experiments, MPI.SUM, 0)
        total_high_res_experiments = comm.reduce(high_res_experiments, MPI.SUM,
                                                 0)
        all_slopes = comm.reduce(slopes, MPI.SUM, 0)
        all_correlations = comm.reduce(correlations, MPI.SUM, 0)

        # rank 0: log data statistics
        if self.mpi_helper.rank == 0:
            self.logger.main_log(
                'Experiments rejected because of low signal: %d' %
                total_experiments_rejected_because_of_low_signal)
            self.logger.main_log(
                'Experiments rejected because of low correlation with reference: %d'
                %
                total_experiments_rejected_because_of_low_correlation_with_reference
            )
            self.logger.main_log(
                'Reflections rejected because of rejected experiments: %d' %
                total_reflections_removed_because_of_rejected_experiments)
            self.logger.main_log(
                'Experiments with high resolution of %5.2f Angstrom or better: %d'
                % (self.params.merging.d_min, total_high_res_experiments))

            if len(all_slopes) > 0:
                stats_slope = flex.mean_and_variance(flex.double(all_slopes))
                self.logger.main_log(
                    'Average experiment scale factor wrt reference: %f' %
                    (stats_slope.mean()))
            if len(all_correlations) > 0:
                stats_correlation = flex.mean_and_variance(
                    flex.double(all_correlations))
                self.logger.main_log(
                    'Average experiment correlation with reference: %f +/- %f'
                    %
                    (stats_correlation.mean(),
                     stats_correlation.unweighted_sample_standard_deviation()))

            if self.params.postrefinement.enable:
                self.logger.main_log(
                    "Note: scale factors were not applied, because postrefinement is enabled"
                )

        self.logger.log_step_time("SCALE_FRAMES", True)

        # Do we have any data left?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(new_experiments, new_reflections)

        return new_experiments, new_reflections
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                self.logger.log("Reading %s %s" %
                                (experiments_filename, reflections_filename))
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                self.logger.log("Data read, prepping")

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                new_ids = flex.int(len(reflections), -1)
                new_identifiers = flex.std_string(len(reflections))
                eid = reflections.experiment_identifiers()
                for k in eid.keys():
                    del eid[k]
                for experiment_id, experiment in enumerate(experiments):
                    # select reflections of the current experiment
                    refls_sel = reflections['id'] == experiment_id

                    if refls_sel.count(True) == 0: continue

                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    if not self.params.input.keep_imagesets:
                        experiment.imageset = None
                    all_experiments.append(experiment)

                    # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally
                    new_identifiers.set_selected(refls_sel,
                                                 experiment.identifier)

                    new_id = len(all_experiments) - 1
                    eid[new_id] = experiment.identifier
                    new_ids.set_selected(refls_sel, new_id)
                assert (new_ids < 0
                        ).count(True) == 0, "Not all reflections accounted for"
                reflections['id'] = new_ids
                reflections['exp_id'] = new_identifiers
                all_reflections.extend(reflections)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        all_reflections = self.prune_reflection_table_keys(all_reflections)

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)
        return all_experiments, all_reflections
示例#6
0
    def run(self, experiments, reflections):
        self.logger.log_step_time("POSTREFINEMENT")
        if (not self.params.postrefinement.enable) or (
                self.params.scaling.algorithm !=
                "mark0"):  # mark1 implies no scaling/post-refinement
            self.logger.log("No post-refinement was done")
            if self.mpi_helper.rank == 0:
                self.logger.main_log("No post-refinement was done")
            return experiments, reflections

        target_symm = symmetry(
            unit_cell=self.params.scaling.unit_cell,
            space_group_info=self.params.scaling.space_group)
        i_model = self.params.scaling.i_model
        miller_set = self.params.scaling.miller_set

        # Ensure that match_multi_indices() will return identical results
        # when a frame's observations are matched against the
        # pre-generated Miller set, miller_set, and the reference
        # data set, i_model.  The implication is that the same match
        # can be used to map Miller indices to array indices for intensity
        # accumulation, and for determination of the correlation
        # coefficient in the presence of a scaling reference.
        assert len(i_model.indices()) == len(miller_set.indices())
        assert (i_model.indices() == miller_set.indices()).count(False) == 0

        new_experiments = ExperimentList()
        new_reflections = flex.reflection_table()

        experiments_rejected_by_reason = {}  # reason:how_many_rejected

        for experiment in experiments:

            exp_reflections = reflections.select(
                reflections['exp_id'] == experiment.identifier)

            # Build a miller array with _original_ miller indices of the experiment reflections
            exp_miller_indices_original = miller.set(
                target_symm, exp_reflections['miller_index'],
                not self.params.merging.merge_anomalous)
            observations_original_index = miller.array(
                exp_miller_indices_original,
                exp_reflections['intensity.sum.value'],
                flex.sqrt(exp_reflections['intensity.sum.variance']))

            assert exp_reflections.size() == exp_miller_indices_original.size()
            assert observations_original_index.size(
            ) == exp_miller_indices_original.size()

            # Build a miller array with _asymmetric_ miller indices of the experiment reflections
            exp_miller_indices_asu = miller.set(
                target_symm, exp_reflections['miller_index_asymmetric'], True)
            observations = miller.array(
                exp_miller_indices_asu, exp_reflections['intensity.sum.value'],
                flex.sqrt(exp_reflections['intensity.sum.variance']))

            matches = miller.match_multi_indices(
                miller_indices_unique=miller_set.indices(),
                miller_indices=observations.indices())

            pair1 = flex.int([pair[1] for pair in matches.pairs()
                              ])  # refers to the observations
            pair0 = flex.int([pair[0] for pair in matches.pairs()
                              ])  # refers to the model

            # narrow things down to the set that matches, only
            observations_pair1_selected = observations.customized_copy(
                indices=flex.miller_index(
                    [observations.indices()[p] for p in pair1]),
                data=flex.double([observations.data()[p] for p in pair1]),
                sigmas=flex.double([observations.sigmas()[p] for p in pair1]))

            observations_original_index_pair1_selected = observations_original_index.customized_copy(
                indices=flex.miller_index(
                    [observations_original_index.indices()[p] for p in pair1]),
                data=flex.double(
                    [observations_original_index.data()[p] for p in pair1]),
                sigmas=flex.double(
                    [observations_original_index.sigmas()[p] for p in pair1]))
            I_observed = observations_pair1_selected.data()
            MILLER = observations_original_index_pair1_selected.indices()

            ORI = crystal_orientation(experiment.crystal.get_A(),
                                      basis_type.reciprocal)
            Astar = matrix.sqr(ORI.reciprocal_matrix())
            Astar_from_experiment = matrix.sqr(experiment.crystal.get_A())
            assert Astar == Astar_from_experiment

            WAVE = experiment.beam.get_wavelength()
            BEAM = matrix.col((0.0, 0.0, -1. / WAVE))
            BFACTOR = 0.
            MOSAICITY_DEG = experiment.crystal.get_half_mosaicity_deg()
            DOMAIN_SIZE_A = experiment.crystal.get_domain_size_ang()

            # calculation of correlation here
            I_reference = flex.double(
                [i_model.data()[pair[0]] for pair in matches.pairs()])
            I_invalid = flex.bool(
                [i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()])
            use_weights = False  # New facility for getting variance-weighted correlation

            if use_weights:
                # variance weighting
                I_weight = flex.double([
                    1. / (observations_pair1_selected.sigmas()[pair[1]])**2
                    for pair in matches.pairs()
                ])
            else:
                I_weight = flex.double(
                    len(observations_pair1_selected.sigmas()), 1.)

            I_weight.set_selected(I_invalid, 0.)
            """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement:
         include_negatives = True
         + and - reflections both used for Rh distribution for initial estimate of RS parameter
         + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter
         + and - reflections both passed to the refinery and used in the target function (makes sense if
                             you look at it from a certain point of view)

         include_negatives = False
         + and - reflections both used for Rh distribution for initial estimate of RS parameter
         +       reflections only used for calc/obs correlation slope for initial estimate of G parameter
         + and - reflections both passed to the refinery and used in the target function (makes sense if
                             you look at it from a certain point of view)

         NOTE: by the new design, "include negatives" is always True
      """

            SWC = simple_weighted_correlation(I_weight, I_reference,
                                              I_observed)
            if self.params.output.log_level == 0:
                self.logger.log("Old correlation is: %f" % SWC.corr)

            if self.params.postrefinement.algorithm == "rs":

                Rhall = flex.double()

                for mill in MILLER:
                    H = matrix.col(mill)
                    Xhkl = Astar * H
                    Rh = (Xhkl + BEAM).length() - (1. / WAVE)
                    Rhall.append(Rh)

                Rs = math.sqrt(flex.mean(Rhall * Rhall))

                RS = 1. / 10000.  # reciprocal effective domain size of 1 micron
                RS = Rs  # try this empirically determined approximate, monochrome, a-mosaic value
                current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.])

                parameterization_class = rs_parameterization
                refinery = rs_refinery(ORI=ORI,
                                       MILLER=MILLER,
                                       BEAM=BEAM,
                                       WAVE=WAVE,
                                       ICALCVEC=I_reference,
                                       IOBSVEC=I_observed)

            elif self.params.postrefinement.algorithm == "eta_deff":

                eta_init = 2. * MOSAICITY_DEG * math.pi / 180.
                D_eff_init = 2. * DOMAIN_SIZE_A
                current = flex.double(
                    [SWC.slope, BFACTOR, eta_init, 0., 0., D_eff_init])

                parameterization_class = eta_deff_parameterization
                refinery = eta_deff_refinery(ORI=ORI,
                                             MILLER=MILLER,
                                             BEAM=BEAM,
                                             WAVE=WAVE,
                                             ICALCVEC=I_reference,
                                             IOBSVEC=I_observed)

            func = refinery.fvec_callable(parameterization_class(current))
            functional = flex.sum(func * func)

            if self.params.output.log_level == 0:
                self.logger.log("functional: %f" % functional)

            self.current = current
            self.parameterization_class = parameterization_class
            self.refinery = refinery

            self.observations_pair1_selected = observations_pair1_selected
            self.observations_original_index_pair1_selected = observations_original_index_pair1_selected

            error_detected = False

            try:
                self.run_plain()

                result_observations_original_index, result_observations, result_matches = self.result_for_cxi_merge(
                )

                assert result_observations_original_index.size(
                ) == result_observations.size()
                assert result_matches.pairs().size(
                ) == result_observations_original_index.size()
            except (AssertionError, ValueError, RuntimeError) as e:
                error_detected = True
                reason = repr(e)
                if not reason:
                    reason = "Unknown error"
                if not reason in experiments_rejected_by_reason:
                    experiments_rejected_by_reason[reason] = 1
                else:
                    experiments_rejected_by_reason[reason] += 1

            if not error_detected:
                new_experiments.append(experiment)

                new_exp_reflections = flex.reflection_table()
                new_exp_reflections[
                    'miller_index_asymmetric'] = result_observations.indices()
                new_exp_reflections[
                    'intensity.sum.value'] = result_observations.data()
                new_exp_reflections['intensity.sum.variance'] = flex.pow(
                    result_observations.sigmas(), 2)
                new_exp_reflections['exp_id'] = flex.std_string(
                    len(new_exp_reflections), experiment.identifier)

                # The original reflection table, i.e. the input to this run() method, has more columns than those used
                # for the postrefinement ("data" and "sigma" in the miller arrays). The problems is: some of the input reflections may have been rejected by now.
                # So to bring those extra columns over to the new reflection table, we have to create a subset of the original exp_reflections table,
                # which would match (by original miller indices) the miller array results of the postrefinement.
                match_original_indices = miller.match_multi_indices(
                    miller_indices_unique=exp_miller_indices_original.indices(
                    ),
                    miller_indices=result_observations_original_index.indices(
                    ))
                exp_reflections_match_results = exp_reflections.select(
                    match_original_indices.pairs().column(0))
                assert (exp_reflections_match_results['intensity.sum.value'] ==
                        result_observations_original_index.data()
                        ).count(False) == 0
                new_exp_reflections[
                    'intensity.sum.value.unmodified'] = exp_reflections_match_results[
                        'intensity.sum.value.unmodified']
                new_exp_reflections[
                    'intensity.sum.variance.unmodified'] = exp_reflections_match_results[
                        'intensity.sum.variance.unmodified']

                new_reflections.extend(new_exp_reflections)

        # report rejected experiments, reflections
        experiments_rejected_by_postrefinement = len(experiments) - len(
            new_experiments)
        reflections_rejected_by_postrefinement = reflections.size(
        ) - new_reflections.size()

        self.logger.log("Experiments rejected by post-refinement: %d" %
                        experiments_rejected_by_postrefinement)
        self.logger.log("Reflections rejected by post-refinement: %d" %
                        reflections_rejected_by_postrefinement)

        all_reasons = []
        for reason, count in six.iteritems(experiments_rejected_by_reason):
            self.logger.log("Experiments rejected due to %s: %d" %
                            (reason, count))
            all_reasons.append(reason)

        comm = self.mpi_helper.comm
        MPI = self.mpi_helper.MPI

        # Collect all rejection reasons from all ranks. Use allreduce to let each rank have all reasons.
        all_reasons = comm.allreduce(all_reasons, MPI.SUM)
        all_reasons = set(all_reasons)

        # Now that each rank has all reasons from all ranks, we can treat the reasons in a uniform way.
        total_experiments_rejected_by_reason = {}
        for reason in all_reasons:
            rejected_experiment_count = 0
            if reason in experiments_rejected_by_reason:
                rejected_experiment_count = experiments_rejected_by_reason[
                    reason]
            total_experiments_rejected_by_reason[reason] = comm.reduce(
                rejected_experiment_count, MPI.SUM, 0)

        total_accepted_experiment_count = comm.reduce(len(new_experiments),
                                                      MPI.SUM, 0)

        # how many reflections have we rejected due to post-refinement?
        rejected_reflections = len(reflections) - len(new_reflections)
        total_rejected_reflections = self.mpi_helper.sum(rejected_reflections)

        if self.mpi_helper.rank == 0:
            for reason, count in six.iteritems(
                    total_experiments_rejected_by_reason):
                self.logger.main_log(
                    "Total experiments rejected due to %s: %d" %
                    (reason, count))
            self.logger.main_log("Total experiments accepted: %d" %
                                 total_accepted_experiment_count)
            self.logger.main_log(
                "Total reflections rejected due to post-refinement: %d" %
                total_rejected_reflections)

        self.logger.log_step_time("POSTREFINEMENT", True)

        # Do we have any data left?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(new_experiments, new_reflections)

        return new_experiments, new_reflections
示例#7
0
文件: cosym.py 项目: dials/cctbx
    def run(self, input_experiments, input_reflections):
        from collections import OrderedDict
        if self.mpi_helper.rank == 0:
            print("Starting cosym worker")
            #Overall = Profiler("Cosym total time")

        #  Evenly distribute all experiments from mpi_helper ranks
        reports = self.mpi_helper.comm.gather(
            (len(input_experiments)),
            root=0)  # report from all ranks on experiment count
        if self.mpi_helper.rank == 0:
            from xfel.merging.application.modify.token_passing_left_right import construct_src_to_dst_plan
            plan = construct_src_to_dst_plan(
                flex.int(reports), self.params.modify.cosym.tranch_size,
                self.mpi_helper.comm)
        else:
            plan = 0
        plan = self.mpi_helper.comm.bcast(plan, root=0)
        dst_offset = 1 if self.mpi_helper.size > 1 else 0  # decision whether to reserve rank 0 for parallel anchor determination
        # FIXME XXX probably need to look at plan size to decide dst_offset or not
        from xfel.merging.application.modify.token_passing_left_right import apply_all_to_all
        tokens = apply_all_to_all(plan=plan,
                                  dst_offset=dst_offset,
                                  value=(input_experiments, input_reflections),
                                  comm=self.mpi_helper.comm)

        if self.params.modify.cosym.anchor:
            if self.mpi_helper.rank == 0:
                MIN_ANCHOR = 20
                from xfel.merging.application.modify.token_passing_left_right import construct_anchor_src_to_dst_plan
                anchor_plan = construct_anchor_src_to_dst_plan(
                    MIN_ANCHOR, flex.int(reports),
                    self.params.modify.cosym.tranch_size, self.mpi_helper.comm)
            else:
                anchor_plan = 0
            anchor_plan = self.mpi_helper.comm.bcast(anchor_plan, root=0)
        self.logger.log_step_time("COSYM")

        if self.params.modify.cosym.plot.interactive:
            self.params.modify.cosym.plot.filename = None

        has_tokens = len(tokens) > 0
        all_has_tokens = self.mpi_helper.comm.allgather(has_tokens)
        ranks_with_tokens = [
            i for (i, val) in enumerate(all_has_tokens) if val
        ]
        ranks_to_plot = ranks_with_tokens[:self.params.modify.cosym.plot.n_max]
        do_plot = (self.params.modify.cosym.plot.do_plot
                   and self.mpi_helper.rank in ranks_to_plot)

        if len(
                tokens
        ) > 0:  # Only select ranks that have been assigned tranch data, for mutual coset determination
            # because cosym has a problem with hashed identifiers, use simple experiment identifiers
            sampling_experiments_for_cosym = ExperimentList()
            sampling_reflections_for_cosym = [
            ]  # is a list of flex.reflection_table
            COSYM = self.task_c(self.params,
                                self.mpi_helper,
                                self.logger,
                                tokens,
                                sampling_experiments_for_cosym,
                                sampling_reflections_for_cosym,
                                communicator_size=self.mpi_helper.size,
                                do_plot=do_plot)
            self.uuid_cache = COSYM.uuid_cache  # reformed uuid list after n_refls filter

            rank_N_refl = flex.double([r.size() for r in COSYM.reflections])
            message = """Task 1. Prepare the data for cosym
    change_of_basis_ops_to_minimum_cell
    eliminate_sys_absent
    transform models into Miller arrays, putting data in primitive triclinic reduced cell
    There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % (
                len(COSYM.experiments), flex.sum(rank_N_refl),
                flex.mean(rank_N_refl))
            self.logger.log(message)
            if self.mpi_helper.rank == 1:
                print(message)  #; P = Timer("COSYM.run")
            COSYM.run()
            #if self.mpi_helper.rank == 1: del P

            keyval = [("experiment", []), ("reindex_op", []), ("coset", [])]
            raw = OrderedDict(keyval)

            if self.mpi_helper.rank == 0:
                print("Rank", self.mpi_helper.rank, "experiments:",
                      len(sampling_experiments_for_cosym))

            for sidx in range(len(self.uuid_cache)):
                raw["experiment"].append(self.uuid_cache[sidx])

                sidx_plus = sidx

                try:
                    minimum_to_input = COSYM.cb_op_to_minimum[
                        sidx_plus].inverse()
                except Exception as e:
                    print("raising", e, sidx_plus, len(COSYM.cb_op_to_minimum))
                    raise e

                reindex_op = minimum_to_input * \
                             sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \
                             COSYM.cb_op_to_minimum[sidx_plus]

                # Keep this block even though not currently used; need for future assertions:
                LG = COSYM.cosym_analysis.target._lattice_group
                LGINP = LG.change_basis(
                    COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                        minimum_to_input)
                SG = COSYM.cosym_analysis.input_space_group
                SGINP = SG.change_basis(
                    COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                        minimum_to_input)
                CO = sgtbx.cosets.left_decomposition(LGINP, SGINP)
                partitions = CO.partitions
                this_reindex_op = reindex_op.as_hkl()
                this_coset = None
                for p_no, partition in enumerate(partitions):
                    partition_ops = [
                        change_of_basis_op(ip).as_hkl() for ip in partition
                    ]
                    if this_reindex_op in partition_ops:
                        this_coset = p_no
                        break
                assert this_coset is not None
                raw["coset"].append(this_coset)
                raw["reindex_op"].append(this_reindex_op)

            keys = list(raw.keys())
            from pandas import DataFrame as df
            data = df(raw)
            # major assumption is that all the coset decompositions "CO" are the same.  NOT sure if a test is needed.
            reports = self.mpi_helper.comm.gather((data, CO), root=0)
        else:
            reports = self.mpi_helper.comm.gather(None, root=0)
        if self.mpi_helper.rank == 0:
            # report back to rank==0 and reconcile all coset assignments
            while None in reports:
                reports.pop(reports.index(None))
            # global CO
            global_coset_decomposition = reports[0][
                1]  # again, assuming here they are all the same XXX
        else:
            global_coset_decomposition = 0
        global_coset_decomposition = self.mpi_helper.comm.bcast(
            global_coset_decomposition, root=0)
        partitions = global_coset_decomposition.partitions
        self.mpi_helper.comm.barrier()
        # end of distributed embedding

        if self.params.modify.cosym.anchor:
            anchor_tokens = apply_all_to_all(plan=anchor_plan,
                                             dst_offset=0,
                                             value=(input_experiments,
                                                    input_reflections),
                                             comm=self.mpi_helper.comm)

        if self.mpi_helper.rank == 0:
            from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports
            REC = reconcile_cosym_reports(reports)
            results = REC.composite_tranch_merge(voting_method="consensus")

            # at this point we have the opportunity to reconcile the results with an anchor
            # recycle the data structures for anchor determination
            if self.params.modify.cosym.anchor:
                sampling_experiments_for_cosym, sampling_reflections_for_cosym = self.task_a(
                    self.params)
                ANCHOR = self.task_c(
                    self.params,
                    self.mpi_helper,
                    self.logger,
                    anchor_tokens,
                    sampling_experiments_for_cosym,
                    sampling_reflections_for_cosym,
                    uuid_starting=["anchor structure"],
                    communicator_size=1)  # only run on the rank==0 tranch.
                self.uuid_cache = ANCHOR.uuid_cache  # reformed uuid list after n_refls filter
                #P = Timer("ANCHOR.run")
                ANCHOR.run(
                )  # Future redesign XXX FIXME do this in rank 0 in parallel with distributed composite tranches
                #del P

                keyval = [("experiment", []), ("coset", [])]
                raw = OrderedDict(keyval)
                print("Anchor", "experiments:",
                      len(sampling_experiments_for_cosym))

                anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \
                           sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \
                           ANCHOR.cb_op_to_minimum[0]
                anchor_coset = None
                for p_no, partition in enumerate(partitions):
                    partition_ops = [
                        change_of_basis_op(ip).as_hkl() for ip in partition
                    ]
                    if anchor_op.as_hkl() in partition_ops:
                        anchor_coset = p_no
                        break
                assert anchor_coset is not None
                print("The consensus for the anchor is", anchor_op.as_hkl(),
                      " anchor coset", anchor_coset)

                raw["experiment"].append("anchor structure")
                raw["coset"].append(anchor_coset)
                for sidx in range(1, len(self.uuid_cache)):
                    raw["experiment"].append(self.uuid_cache[sidx])

                    sidx_plus = sidx

                    minimum_to_input = ANCHOR.cb_op_to_minimum[
                        sidx_plus].inverse()
                    reindex_op = minimum_to_input * \
                             sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \
                             ANCHOR.cb_op_to_minimum[sidx_plus]
                    this_reindex_op = reindex_op.as_hkl()
                    this_coset = None
                    for p_no, partition in enumerate(partitions):
                        partition_ops = [
                            change_of_basis_op(ip).as_hkl() for ip in partition
                        ]
                        if this_reindex_op in partition_ops:
                            this_coset = p_no
                            break
                    assert this_coset is not None
                    raw["coset"].append(this_coset)

                from pandas import DataFrame as df
                anchor_data = df(raw)
                REC.reconcile_with_anchor(results, anchor_data, anchor_op)
                # no need for return value; results dataframe is modified in place

            if self.params.modify.cosym.dataframe:
                import os
                results.to_pickle(
                    path=os.path.join(self.params.output.output_dir,
                                      self.params.modify.cosym.dataframe))
            transmitted = results
        else:
            transmitted = 0
        self.mpi_helper.comm.barrier()
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        # "transmitted" holds the global coset assignments

        #subselect expt and refl on the successful coset assignments
        # output:  experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place)
        result_experiments_for_cosym = ExperimentList()
        good_refls = flex.bool(len(input_reflections), False)
        good_expt_id = list(transmitted["experiment"])
        good_coset = list(
            transmitted["coset"]
        )  # would like to understand how to use pandas rather than Python list
        for iexpt in range(len(input_experiments)):
            iexpt_id = input_experiments[iexpt].identifier
            keepit = iexpt_id in good_expt_id
            if keepit:
                this_coset = good_coset[good_expt_id.index(iexpt_id)]
                this_cb_op = change_of_basis_op(
                    global_coset_decomposition.partitions[this_coset][0])
                accepted_expt = input_experiments[iexpt]
                if this_coset > 0:
                    accepted_expt.crystal = MosaicCrystalSauter2014(
                        accepted_expt.crystal.change_basis(this_cb_op))
                    # need to use wrapper because of cctbx/dxtbx#5
                result_experiments_for_cosym.append(accepted_expt)
                good_refls |= input_reflections["exp_id"] == iexpt_id
        selected_reflections = input_reflections.select(
            good_refls)  # XXX is this in place (double check)
        self.mpi_helper.comm.barrier()

        # still have to reindex the reflection table, but try to do it efficiently
        from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset
        if (len(result_experiments_for_cosym) > 0):
            reindex_refl_by_coset(
                refl=selected_reflections,
                data=transmitted,
                symms=[
                    E.crystal.get_crystal_symmetry()
                    for E in result_experiments_for_cosym
                ],
                uuids=[E.identifier for E in result_experiments_for_cosym],
                co=global_coset_decomposition,
                anomalous_flag=self.params.merging.merge_anomalous == False,
                verbose=False)
        # this should have re-indexed the refls in place, no need for return value

        self.mpi_helper.comm.barrier()
        # Note:  this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group)
        # in this use case we assume all inputs and outputs are in P63.
        # more complex use cases would have to reset the space group in the crystal, and recalculate
        # the ASU "miller_indicies" in the reflections table.

        self.logger.log_step_time("COSYM", True)
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(result_experiments_for_cosym,
                                        selected_reflections)
        return result_experiments_for_cosym, selected_reflections
示例#8
0
    def run(self, experiments, reflections):
        if 'unit_cell' not in self.params.filter.algorithm:  # so far only "unit_cell" algorithm is supported
            return experiments, reflections

        self.logger.log_step_time("FILTER_EXPERIMENTS")

        # If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets.
        if self.params.filter.unit_cell.value.target_unit_cell == Auto:
            self.params.filter.unit_cell.value.target_unit_cell = self.params.scaling.unit_cell
        if self.params.filter.unit_cell.value.target_space_group == Auto:
            self.params.filter.unit_cell.value.target_space_group = self.params.scaling.space_group

        self.logger.log(
            "Using filter target unit cell: %s" %
            str(self.params.filter.unit_cell.value.target_unit_cell))
        self.logger.log(
            "Using filter target space group: %s" %
            str(self.params.filter.unit_cell.value.target_space_group))

        experiment_ids_to_remove = []
        removed_for_unit_cell = 0
        removed_for_space_group = 0
        for experiment in experiments:
            if not self.check_space_group(experiment):
                experiment_ids_to_remove.append(experiment.identifier)
                removed_for_space_group += 1
            elif not self.check_unit_cell(experiment):
                experiment_ids_to_remove.append(experiment.identifier)
                removed_for_unit_cell += 1

        new_experiments, new_reflections = experiment_filter.remove_experiments(
            experiments, reflections, experiment_ids_to_remove)

        removed_reflections = len(reflections) - len(new_reflections)
        assert removed_for_space_group + removed_for_unit_cell == len(
            experiments) - len(new_experiments)

        self.logger.log(
            "Experiments rejected because of unit cell dimensions: %d" %
            removed_for_unit_cell)
        self.logger.log("Experiments rejected because of space group %d" %
                        removed_for_space_group)
        self.logger.log(
            "Reflections rejected because of rejected experiments: %d" %
            removed_reflections)

        # MPI-reduce total counts
        comm = self.mpi_helper.comm
        MPI = self.mpi_helper.MPI
        total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell,
                                                  MPI.SUM, 0)
        total_removed_for_space_group = comm.reduce(removed_for_space_group,
                                                    MPI.SUM, 0)
        total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM,
                                                0)

        # rank 0: log total counts
        if self.mpi_helper.rank == 0:
            self.logger.main_log(
                "Total experiments rejected because of unit cell dimensions: %d"
                % total_removed_for_unit_cell)
            self.logger.main_log(
                "Total experiments rejected because of space group %d" %
                total_removed_for_space_group)
            self.logger.main_log(
                "Total reflections rejected because of rejected experiments %d"
                % total_reflections_removed)

        self.logger.log_step_time("FILTER_EXPERIMENTS", True)

        # Do we have any data left?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(new_experiments, new_reflections)

        return new_experiments, new_reflections
示例#9
0
  def run(self, experiments, reflections):
    if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported
      return experiments, reflections

    self.logger.log_step_time("FILTER_EXPERIMENTS")

    experiment_ids_to_remove = []
    removed_for_unit_cell = 0
    removed_for_space_group = 0

# BEGIN BY-VALUE FILTER
    if self.params.filter.unit_cell.algorithm == "value":
    # If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets.
      if self.params.filter.unit_cell.value.target_unit_cell == Auto:
        if self.params.scaling.unit_cell is None:
          try:
            self.params.filter.unit_cell.value.target_unit_cell = self.params.statistics.average_unit_cell
          except AttributeError:
            pass
        else:
          self.params.filter.unit_cell.value.target_unit_cell = self.params.scaling.unit_cell
      if self.params.filter.unit_cell.value.target_space_group == Auto:
        self.params.filter.unit_cell.value.target_space_group = self.params.scaling.space_group

      self.logger.log("Using filter target unit cell: %s"%str(self.params.filter.unit_cell.value.target_unit_cell))
      self.logger.log("Using filter target space group: %s"%str(self.params.filter.unit_cell.value.target_space_group))

      for experiment in experiments:
        if not self.check_space_group(experiment):
          experiment_ids_to_remove.append(experiment.identifier)
          removed_for_space_group += 1
        elif not self.check_unit_cell(experiment):
          experiment_ids_to_remove.append(experiment.identifier)
          removed_for_unit_cell += 1
# END BY-VALUE FILTER
    elif self.params.filter.unit_cell.algorithm == "cluster":

      from uc_metrics.clustering.util import get_population_permutation # implicit import
      import pickle

      class Empty: pass
      if self.mpi_helper.rank == 0:
        with open(self.params.filter.unit_cell.cluster.covariance.file,'rb') as F:
          data = pickle.load(F)
          E=Empty()
          E.features_ = data["features"]
          E.sample_name = data["sample"]
          E.output_info = data["info"]
          pop=data["populations"]
          self.logger.main_log("Focusing on cluster component %d from previous analysis of %d cells"%(
            self.params.filter.unit_cell.cluster.covariance.component, len(pop.labels)))
          self.logger.main_log("%s noise %d order %s"%(pop.populations, pop.n_noise_, pop.main_components))

          legend = pop.basic_covariance_compact_report(feature_vectors=E).getvalue()
          self.logger.main_log(legend)
          self.logger.main_log("Applying Mahalanobis cutoff of %.3f"%(self.params.filter.unit_cell.cluster.covariance.mahalanobis))
        transmitted = data
      else:
        transmitted = None
      # distribute cluster information to all ranks
      self.cluster_data = self.mpi_helper.comm.bcast(transmitted, root=0)
      # pull out the index numbers of the unit cell parameters to be used for covariance matrix
      self.cluster_data["idxs"]=[["a","b","c","alpha","beta","gamma"].index(F) for F in self.cluster_data["features"]]

      for experiment in experiments:
        if not self.check_cluster(experiment):
          experiment_ids_to_remove.append(experiment.identifier)
          removed_for_unit_cell += 1
# END OF COVARIANCE FILTER
    new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove)

    removed_reflections = len(reflections) - len(new_reflections)
    assert removed_for_space_group + removed_for_unit_cell == len(experiments) - len(new_experiments)

    self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell)
    self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group)
    self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections)

    # MPI-reduce total counts
    comm = self.mpi_helper.comm
    MPI = self.mpi_helper.MPI
    total_removed_for_unit_cell  = comm.reduce(removed_for_unit_cell, MPI.SUM, 0)
    total_removed_for_space_group  = comm.reduce(removed_for_space_group, MPI.SUM, 0)
    total_reflections_removed  = comm.reduce(removed_reflections, MPI.SUM, 0)

    # rank 0: log total counts
    if self.mpi_helper.rank == 0:
      self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell)
      self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group)
      self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed)

    self.logger.log_step_time("FILTER_EXPERIMENTS", True)

    # Do we have any data left?
    from xfel.merging.application.utils.data_counter import data_counter
    data_counter(self.params).count(new_experiments, new_reflections)

    return new_experiments, new_reflections
示例#10
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters

            # optionally write a file list mapping to disk, useful in post processing if save_experiments_and_reflections=True
            file_id_from_names = None
            if self.params.output.expanded_bookkeeping:
                apath = lambda x: os.path.abspath(x)
                file_names_from_id = {
                    i_f: tuple(map(apath, exp_ref_pair))
                    for i_f, exp_ref_pair in enumerate(file_list)
                }
                with open(
                        os.path.join(self.params.output.output_dir,
                                     "file_list_map.json"), "w") as o:
                    json.dump(file_names_from_id, o)
                file_id_from_names = {
                    tuple(map(apath, exp_ref_pair)): i_f
                    for i_f, exp_ref_pair in enumerate(file_list)
                }

            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list, file_id_from_names
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        new_file_list, file_names_mapping = self.mpi_helper.comm.bcast(
            transmitted, root=0)
        new_file_list = new_file_list[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(new_file_list) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                self.logger.log("Reading %s %s" %
                                (experiments_filename, reflections_filename))
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename,
                    check_format=self.params.input.read_image_headers)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                if self.params.output.expanded_bookkeeping:
                    # NOTE: these are un-prunable
                    reflections["input_refl_index"] = flex.int(
                        list(range(len(reflections))))
                    reflections["orig_exp_id"] = reflections['id']
                    assert file_names_mapping is not None
                    exp_ref_pair = os.path.abspath(
                        experiments_filename), os.path.abspath(
                            reflections_filename)
                    this_refl_fileMappings = [
                        file_names_mapping[exp_ref_pair]
                    ] * len(reflections)
                    reflections["file_list_mapping"] = flex.int(
                        this_refl_fileMappings)
                self.logger.log("Data read, prepping")

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                new_ids = flex.int(len(reflections), -1)
                new_identifiers = flex.std_string(len(reflections))
                eid = reflections.experiment_identifiers()
                for k in eid.keys():
                    del eid[k]

                if self.params.output.expanded_bookkeeping:
                    preGen_experiment_identifiers(experiments,
                                                  experiments_filename)
                for experiment_id, experiment in enumerate(experiments):
                    # select reflections of the current experiment
                    refls_sel = reflections['id'] == experiment_id

                    if refls_sel.count(True) == 0: continue

                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    if not self.params.input.keep_imagesets:
                        experiment.imageset = None
                    all_experiments.append(experiment)

                    # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally
                    new_identifiers.set_selected(refls_sel,
                                                 experiment.identifier)

                    new_id = len(all_experiments) - 1
                    eid[new_id] = experiment.identifier
                    new_ids.set_selected(refls_sel, new_id)
                assert (new_ids < 0
                        ).count(True) == 0, "Not all reflections accounted for"
                reflections['id'] = new_ids
                reflections['exp_id'] = new_identifiers
                all_reflections.extend(reflections)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        all_reflections = self.prune_reflection_table_keys(all_reflections)

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)
        return all_experiments, all_reflections
示例#11
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)

                for experiment_id, experiment in enumerate(experiments):
                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)
                    all_experiments.append(experiment)
                    #experiment.identifier = "%d"%(len(all_experiments) - 1)

                    # select reflections of the current experiment
                    refls = reflections.select(
                        reflections['id'] == experiment_id)

                    # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally
                    #refls['id'] = flex.size_t(len(refls), len(all_experiments)-1)
                    refls['exp_id'] = flex.std_string(len(refls),
                                                      experiment.identifier)

                    all_reflections.extend(refls)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.reflection_table_utils import reflection_table_utils
        all_reflections = reflection_table_utils.prune_reflection_table_keys(
            reflections=all_reflections,
            keys_to_keep=[
                'intensity.sum.value', 'intensity.sum.variance',
                'miller_index', 'miller_index_asymmetric', 'exp_id', 's1'
            ])
        self.logger.log("Pruned reflection table")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)

        return all_experiments, all_reflections
示例#12
0
    def run(self, experiments, reflections):

        assert self.mpi_helper.size not in [2,3,4], "Please run modify_cosym on " \
            "1 or >= 5 MPI ranks."

        self.logger.log_step_time("COSYM")

        all_sampling_experiments = experiments
        all_sampling_reflections = reflections
        # because cosym has a problem with hashed identifiers, use simple experiment identifiers
        from dxtbx.model.experiment_list import ExperimentList
        sampling_experiments_for_cosym = ExperimentList()
        sampling_reflections_for_cosym = [
        ]  # is a list of flex.reflection_table

        def task_a():
            # add an anchor
            if self.params.modify.cosym.anchor:
                from xfel.merging.application.model.crystal_model import crystal_model
                XM = crystal_model(params=self.params, purpose="cosym")
                model_intensities = XM.run([], [])
                from dxtbx.model import Experiment, Crystal
                from scitbx.matrix import sqr
                O = sqr(model_intensities.unit_cell().orthogonalization_matrix(
                )).transpose().elems
                real_a = (O[0], O[1], O[2])
                real_b = (O[3], O[4], O[5])
                real_c = (O[6], O[7], O[8])
                nc = Crystal(real_a, real_b, real_c,
                             model_intensities.space_group())
                sampling_experiments_for_cosym.append(
                    Experiment(crystal=nc)
                )  # prepends the reference model to the cosym E-list
                from dials.array_family import flex

                exp_reflections = flex.reflection_table()
                exp_reflections[
                    'intensity.sum.value'] = model_intensities.data()
                exp_reflections['intensity.sum.variance'] = flex.pow(
                    model_intensities.sigmas(), 2)
                exp_reflections['miller_index'] = model_intensities.indices()
                exp_reflections[
                    'miller_index_asymmetric'] = model_intensities.indices()
                exp_reflections['flags'] = flex.size_t(
                    model_intensities.size(),
                    flex.reflection_table.flags.integrated_sum)

                # prepare individual reflection tables for each experiment

                simple_experiment_id = len(sampling_experiments_for_cosym) - 1
                #experiment.identifier = "%d"%simple_experiment_id
                sampling_experiments_for_cosym[
                    -1].identifier = "%d" % simple_experiment_id
                # experiment identifier must be a string according to *.h file
                # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis

                exp_reflections['id'] = flex.int(len(exp_reflections),
                                                 simple_experiment_id)
                # register the integer id as a new column in the per-experiment reflection table

                exp_reflections.experiment_identifiers(
                )[simple_experiment_id] = sampling_experiments_for_cosym[
                    -1].identifier
                #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment)

                sampling_reflections_for_cosym.append(exp_reflections)

        #if self.mpi_helper.rank == 0:
        # task_a() # no anchor for initial pass

        def task_1(uuid_starting=[], mpi_helper_size=1, do_plot=False):
            self.uuid_cache = uuid_starting
            if mpi_helper_size == 1:  # simple case, one rank
                for experiment in all_sampling_experiments:
                    sampling_experiments_for_cosym.append(experiment)
                    self.uuid_cache.append(experiment.identifier)

                    exp_reflections = all_sampling_reflections.select(
                        all_sampling_reflections['exp_id'] ==
                        experiment.identifier)
                    # prepare individual reflection tables for each experiment

                    simple_experiment_id = len(
                        sampling_experiments_for_cosym) - 1
                    #experiment.identifier = "%d"%simple_experiment_id
                    sampling_experiments_for_cosym[
                        -1].identifier = "%d" % simple_experiment_id
                    # experiment identifier must be a string according to *.h file
                    # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis

                    exp_reflections['id'] = flex.int(len(exp_reflections),
                                                     simple_experiment_id)
                    # register the integer id as a new column in the per-experiment reflection table

                    exp_reflections.experiment_identifiers(
                    )[simple_experiment_id] = sampling_experiments_for_cosym[
                        -1].identifier
                    #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment)

                    sampling_reflections_for_cosym.append(exp_reflections)
            else:  # complex case, overlap tranches for mutual coset determination
                self.mpi_helper.MPI.COMM_WORLD.barrier()
                from xfel.merging.application.modify.token_passing_left_right import token_passing_left_right
                values = token_passing_left_right((experiments, reflections))
                for tranch_experiments, tranch_reflections in values:
                    for experiment in tranch_experiments:
                        sampling_experiments_for_cosym.append(experiment)
                        self.uuid_cache.append(experiment.identifier)

                        exp_reflections = tranch_reflections.select(
                            tranch_reflections['exp_id'] ==
                            experiment.identifier)
                        # prepare individual reflection tables for each experiment

                        simple_experiment_id = len(
                            sampling_experiments_for_cosym) - 1
                        #experiment.identifier = "%d"%simple_experiment_id
                        sampling_experiments_for_cosym[
                            -1].identifier = "%d" % simple_experiment_id
                        # experiment identifier must be a string according to *.h file
                        # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis

                        exp_reflections['id'] = flex.int(
                            len(exp_reflections), simple_experiment_id)
                        # register the integer id as a new column in the per-experiment reflection table

                        exp_reflections.experiment_identifiers(
                        )[simple_experiment_id] = sampling_experiments_for_cosym[
                            -1].identifier
                        #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment)

                        sampling_reflections_for_cosym.append(exp_reflections)

            from dials.command_line import cosym as cosym_module
            cosym_module.logger = self.logger

            i_plot = self.mpi_helper.rank
            from xfel.merging.application.modify.aux_cosym import dials_cl_cosym_subclass as dials_cl_cosym_wrapper
            COSYM = dials_cl_cosym_wrapper(
                sampling_experiments_for_cosym,
                sampling_reflections_for_cosym,
                self.uuid_cache,
                params=self.params.modify.cosym,
                output_dir=self.params.output.output_dir,
                do_plot=do_plot,
                i_plot=i_plot)
            return COSYM

        if self.params.modify.cosym.plot.interactive:
            self.params.modify.cosym.plot.filename = None
        do_plot = (self.params.modify.cosym.plot.do_plot and
                   self.mpi_helper.rank < self.params.modify.cosym.plot.n_max)
        COSYM = task_1(mpi_helper_size=self.mpi_helper.size, do_plot=do_plot)
        self.uuid_cache = COSYM.uuid_cache  # reformed uuid list after n_refls filter

        import dials.algorithms.symmetry.cosym.target
        from xfel.merging.application.modify.aux_cosym import TargetWithFastRij
        dials.algorithms.symmetry.cosym.target.Target = TargetWithFastRij

        rank_N_refl = flex.double([r.size() for r in COSYM.reflections])
        message = """Task 1. Prepare the data for cosym
    change_of_basis_ops_to_minimum_cell
    eliminate_sys_absent
    transform models into Miller arrays, putting data in primitive triclinic reduced cell
    There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % (
            len(COSYM.experiments), flex.sum(rank_N_refl),
            flex.mean(rank_N_refl))
        self.logger.log(message)

        COSYM.run()

        from collections import OrderedDict
        #assert len(sampling_experiments_for_cosym) + 1 anchor if present == len(COSYM._experiments)
        keyval = [("experiment", []), ("reindex_op", []), ("coset", [])]
        raw = OrderedDict(keyval)
        print("Rank", self.mpi_helper.rank, "experiments:",
              len(sampling_experiments_for_cosym))

        for sidx in range(len(self.uuid_cache)):
            raw["experiment"].append(self.uuid_cache[sidx])

            sidx_plus = sidx

            minimum_to_input = COSYM.cb_op_to_minimum[sidx_plus].inverse()
            reindex_op = minimum_to_input * \
                           sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \
                           COSYM.cb_op_to_minimum[sidx_plus]

            # Keep this block even though not currently used; need for future assertions:
            LG = COSYM.cosym_analysis.target._lattice_group
            LGINP = LG.change_basis(
                COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                    minimum_to_input)
            SG = COSYM.cosym_analysis.input_space_group
            SGINP = SG.change_basis(
                COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                    minimum_to_input)
            CO = sgtbx.cosets.left_decomposition(LGINP, SGINP)
            partitions = CO.partitions
            this_reindex_op = reindex_op.as_hkl()
            this_coset = None
            for p_no, partition in enumerate(partitions):
                partition_ops = [
                    change_of_basis_op(ip).as_hkl() for ip in partition
                ]
                if this_reindex_op in partition_ops:
                    this_coset = p_no
                    break
            assert this_coset is not None
            raw["coset"].append(this_coset)
            raw["reindex_op"].append(this_reindex_op)

        keys = list(raw.keys())
        from pandas import DataFrame as df
        data = df(raw)
        # major assumption is that all the coset decompositions "CO" are the same.  NOT sure if a test is needed.

        # report back to rank==0 and reconcile all coset assignments
        reports = self.mpi_helper.comm.gather((data, CO), root=0)
        if self.mpi_helper.rank == 0:
            from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports
            REC = reconcile_cosym_reports(reports)
            results = REC.simple_merge(voting_method="consensus")

            # at this point we have the opportunity to reconcile the results with an anchor
            # recycle the data structures for anchor determination
            if self.params.modify.cosym.anchor:
                sampling_experiments_for_cosym = ExperimentList()
                sampling_reflections_for_cosym = []
                print("ANCHOR determination")
                task_a()
                ANCHOR = task_1(
                    uuid_starting=["anchor structure"],
                    mpi_helper_size=1)  # only run on the rank==0 tranch.
                self.uuid_cache = ANCHOR.uuid_cache  # reformed uuid list after n_refls filter
                ANCHOR.run()

                keyval = [("experiment", []), ("coset", [])]
                raw = OrderedDict(keyval)
                print("Anchor", "experiments:",
                      len(sampling_experiments_for_cosym))

                anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \
                           sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \
                           ANCHOR.cb_op_to_minimum[0]
                anchor_coset = None
                for p_no, partition in enumerate(partitions):
                    partition_ops = [
                        change_of_basis_op(ip).as_hkl() for ip in partition
                    ]
                    if anchor_op.as_hkl() in partition_ops:
                        anchor_coset = p_no
                        break
                assert anchor_coset is not None
                print("The consensus for the anchor is", anchor_op.as_hkl(),
                      " anchor coset", anchor_coset)
                raw["experiment"].append("anchor structure")
                raw["coset"].append(anchor_coset)

                for sidx in range(1, len(self.uuid_cache)):
                    raw["experiment"].append(self.uuid_cache[sidx])

                    sidx_plus = sidx

                    minimum_to_input = ANCHOR.cb_op_to_minimum[
                        sidx_plus].inverse()
                    reindex_op = minimum_to_input * \
                             sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \
                             ANCHOR.cb_op_to_minimum[sidx_plus]
                    this_reindex_op = reindex_op.as_hkl()
                    this_coset = None
                    for p_no, partition in enumerate(partitions):
                        partition_ops = [
                            change_of_basis_op(ip).as_hkl() for ip in partition
                        ]
                        if this_reindex_op in partition_ops:
                            this_coset = p_no
                            break
                    assert this_coset is not None
                    raw["coset"].append(this_coset)

                from pandas import DataFrame as df
                anchor_data = df(raw)
                REC.reconcile_with_anchor(results, anchor_data, anchor_op)
                # no need for return value; results dataframe is modified in place

            if self.params.modify.cosym.dataframe:
                import os
                results.to_pickle(
                    path=os.path.join(self.params.output.output_dir,
                                      self.params.modify.cosym.dataframe))
            transmitted = results
        else:
            transmitted = None
        self.mpi_helper.comm.barrier()
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        # "transmitted" holds the global coset assignments

        # subselect expt and refl on the successful coset assignments
        # output:  experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place)
        result_experiments_for_cosym = ExperimentList()
        good_refls = flex.bool(len(reflections), False)
        good_expt_id = list(transmitted["experiment"])
        good_coset = list(
            transmitted["coset"]
        )  # would like to understand how to use pandas rather than Python list
        for iexpt in range(len(experiments)):
            iexpt_id = experiments[iexpt].identifier
            keepit = iexpt_id in good_expt_id
            if keepit:
                this_coset = good_coset[good_expt_id.index(iexpt_id)]
                this_cb_op = change_of_basis_op(CO.partitions[this_coset][0])
                accepted_expt = experiments[iexpt]
                if this_coset > 0:
                    accepted_expt.crystal = MosaicCrystalSauter2014(
                        accepted_expt.crystal.change_basis(this_cb_op))
                    # need to use wrapper because of cctbx/dxtbx#5
                result_experiments_for_cosym.append(accepted_expt)
                good_refls |= reflections["exp_id"] == iexpt_id
        reflections = reflections.select(good_refls)
        self.mpi_helper.comm.barrier()
        #if self.mpi_helper.rank == 0:
        #  import pickle
        #  with open("refl.pickle","wb") as F:
        #    pickle.dump(reflections, F)
        #    pickle.dump(transmitted, F)
        #    pickle.dump([E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym],F)
        #    pickle.dump([E.identifier for E in result_experiments_for_cosym],F)
        #    pickle.dump(CO, F)

        # still have to reindex the reflection table, but try to do it efficiently
        from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset
        reindex_refl_by_coset(
            refl=reflections,
            data=transmitted,
            symms=[
                E.crystal.get_crystal_symmetry()
                for E in result_experiments_for_cosym
            ],
            uuids=[E.identifier for E in result_experiments_for_cosym],
            co=CO,
            anomalous_flag=self.params.merging.merge_anomalous == False,
            verbose=False)
        # this should have re-indexed the refls in place, no need for return value

        self.mpi_helper.comm.barrier()
        # Note:  this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group)
        # in this use case we assume all inputs and outputs are in P63.
        # more complex use cases would have to reset the space group in the crystal, and recalculate
        # the ASU "miller_indicies" in the reflections table.

        self.logger.log_step_time("COSYM", True)
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(result_experiments_for_cosym,
                                        reflections)
        return result_experiments_for_cosym, reflections
示例#13
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                # NOTE: had to use slicing below because it selection no longer works...
                reflections.sort("id")
                unique_refl_ids = set(reflections['id'])
                assert len(unique_refl_ids) == len(
                    experiments
                ), "refl table and experiment list should contain data on same experiment "  # TODO: decide if this is true
                assert min(
                    reflections["id"]
                ) >= 0, "No more -1 in the id column, ideally it should be the numerical index of experiment, but beware that this is not enforced anywhere in the upstream code base"

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                for experiment_id, experiment in enumerate(experiments):
                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    all_experiments.append(experiment)

                    # select reflections of the current experiment
                    # FIXME the selection was broke for me, it raised
                    #    RuntimeError: boost::bad_get: failed value get using boost::get
                    #refls = reflections.select(reflections['id'] == experiment_id)
                    # NOTE: this is a hack due to the broken expereimnt_id selection above
                    exp_id_pos = np.where(
                        reflections['id'] == experiment_id)[0]
                    assert exp_id_pos.size, "no refls in this experiment"  # NOTE: maybe we can relax this assertion ?
                    refls = reflections[exp_id_pos[0]:exp_id_pos[-1] + 1]

                    #FIXME: how will this work if reading in multiple composite mode experiment jsons?
                    # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally
                    refls['exp_id'] = flex.std_string(len(refls),
                                                      experiment.identifier)

                    new_id = 0
                    if len(all_reflections) > 0:
                        new_id = max(all_reflections['id']) + 1

                    # FIXME: it is hard to interperet that a function call returning a changeable property
                    eid = refls.experiment_identifiers()
                    for k in eid.keys():
                        del eid[k]
                    eid[new_id] = experiment.identifier
                    refls['id'] = flex.int(len(refls), new_id)
                    all_reflections.extend(refls)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.reflection_table_utils import reflection_table_utils
        all_reflections = reflection_table_utils.prune_reflection_table_keys(
            reflections=all_reflections,
            keys_to_keep=[
                'intensity.sum.value', 'intensity.sum.variance',
                'miller_index', 'miller_index_asymmetric', 'exp_id', 's1',
                'intensity.sum.value.unmodified',
                'intensity.sum.variance.unmodified'
            ])
        self.logger.log("Pruned reflection table")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)

        return all_experiments, all_reflections