def distribute_reflections_over_hkl_chunks(self, reflections):
        '''Distribute reflections, according to their HKLs, over pre-set HKL chunks'''
        total_reflection_count = reflections.size()
        total_distributed_reflection_count = 0

        if total_reflection_count > 0:
            # set up two lists to be passed to the C++ extension: HKLs and chunk ids. It's basically a hash table to look up chunk ids by HKLs
            hkl_list = flex.miller_index()
            chunk_id_list = flex.int()

            for i in range(len(self.hkl_split_set)):
                for j in range(len(self.hkl_split_set[i])):
                    hkl = (int(self.hkl_split_set[i][j][0]),
                           int(self.hkl_split_set[i][j][1]),
                           int(self.hkl_split_set[i][j][2]))
                    hkl_list.append(hkl)
                    chunk_id_list.append(i)

            # distribute reflections over hkl chunks, using a C++ extension
            from xfel.merging import get_hkl_chunks_cpp
            get_hkl_chunks_cpp(reflections, hkl_list, chunk_id_list,
                               self.hkl_chunks)
            for chunk in self.hkl_chunks:
                total_distributed_reflection_count += len(chunk)

        self.logger.log(
            "Distributed %d out of %d reflections" %
            (total_distributed_reflection_count, total_reflection_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        reflections.clear()
Exemplo n.º 2
0
 def prune_reflection_table_keys(self, reflections):
   from xfel.merging.application.reflection_table_utils import reflection_table_utils
   reflections = reflection_table_utils.prune_reflection_table_keys(reflections=reflections,
                   keys_to_keep=['intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', \
                                 'exp_id', 's1', 'intensity.sum.value.unmodified', 'intensity.sum.variance.unmodified'])
   self.logger.log("Pruned reflection table")
   self.logger.log("Memory usage: %d MB"%get_memory_usage())
   return reflections
    def get_reflections_from_alltoall_sliced(self, number_of_slices):
        '''Split each hkl chunk into N slices. This is needed to address the MPI alltoall memory problem'''

        result_reflections = self.distribute_reflection_table(
        )  # the total reflection table, which this rank will receive after all slices of alltoall

        list_of_sliced_hkl_chunks = [
        ]  # if self.hkl_chunks is [A,B,C...], this list will be [[A1,A2,...,An], [B1,B2,...,Bn], [C1,C2,...,Cn], ...], where n is the number of chunk slices
        for i in range(len(self.hkl_chunks)):
            hkl_chunk_slices = []
            for chunk_slice in reflection_table_utils.get_next_reflection_table_slice(
                    self.hkl_chunks[i], number_of_slices,
                    self.distribute_reflection_table):
                hkl_chunk_slices.append(chunk_slice)
            list_of_sliced_hkl_chunks.append(hkl_chunk_slices)

        self.logger.log("Ready for all-to-all...")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        for j in range(number_of_slices):
            hkl_chunks_for_alltoall = list()
            for i in range(len(self.hkl_chunks)):
                hkl_chunks_for_alltoall.append(
                    list_of_sliced_hkl_chunks[i][j])  # [Aj,Bj,Cj...]

            self.logger.log_step_time("ALL-TO-ALL")
            self.logger.log("Executing MPI all-to-all...")
            self.logger.log("Memory usage: %d MB" % get_memory_usage())

            received_hkl_chunks = comm.alltoall(hkl_chunks_for_alltoall)

            self.logger.log("After all-to-all received %d hkl chunks" %
                            len(received_hkl_chunks))
            self.logger.log_step_time("ALL-TO-ALL", True)

            self.logger.log_step_time("CONSOLIDATE")
            self.logger.log("Consolidating reflection tables...")

            for chunk in received_hkl_chunks:
                result_reflections.extend(chunk)

            self.logger.log_step_time("CONSOLIDATE", True)

        return result_reflections
Exemplo n.º 4
0
 def prune_reflection_table_keys(self, reflections):
     from xfel.merging.application.reflection_table_utils import reflection_table_utils
     reflections = reflection_table_utils.prune_reflection_table_keys(reflections=reflections,
                     keys_to_keep=['intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', \
                                   'exp_id', 's1', 'intensity.sum.value.unmodified', 'intensity.sum.variance.unmodified',
                                   'kapton_absorption_correction', 'flags'],
                     keys_to_ignore=self.params.input.persistent_refl_cols)
     self.logger.log("Pruned reflection table")
     self.logger.log("Memory usage: %d MB" % get_memory_usage())
     return reflections
Exemplo n.º 5
0
 def _mem_usage(self):
     memMB = get_memory_usage()
     import socket
     host = socket.gethostname()
     print("Rank 0 reporting memory usage: %f GB on Rank 0 node %s" %
           (memMB / 1e3, host))
Exemplo n.º 6
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                self.logger.log("Reading %s %s" %
                                (experiments_filename, reflections_filename))
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                self.logger.log("Data read, prepping")

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                new_ids = flex.int(len(reflections), -1)
                new_identifiers = flex.std_string(len(reflections))
                eid = reflections.experiment_identifiers()
                for k in eid.keys():
                    del eid[k]
                for experiment_id, experiment in enumerate(experiments):
                    # select reflections of the current experiment
                    refls_sel = reflections['id'] == experiment_id

                    if refls_sel.count(True) == 0: continue

                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    if not self.params.input.keep_imagesets:
                        experiment.imageset = None
                    all_experiments.append(experiment)

                    # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally
                    new_identifiers.set_selected(refls_sel,
                                                 experiment.identifier)

                    new_id = len(all_experiments) - 1
                    eid[new_id] = experiment.identifier
                    new_ids.set_selected(refls_sel, new_id)
                assert (new_ids < 0
                        ).count(True) == 0, "Not all reflections accounted for"
                reflections['id'] = new_ids
                reflections['exp_id'] = new_identifiers
                all_reflections.extend(reflections)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        all_reflections = self.prune_reflection_table_keys(all_reflections)

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)
        return all_experiments, all_reflections
Exemplo n.º 7
0
def tst_one(i_exp,spectra,Fmerge,gpu_channels_singleton,rank,params):
    from simtbx.nanoBragg import utils
    from dxtbx.model.experiment_list import ExperimentListFactory
    import numpy as np

    print("Experiment %d" % i_exp, flush=True)
    sys.stdout.flush()

    outfile = "boop_%d.hdf5" % i_exp
    from LS49.adse13_187.case_data import retrieve_from_repo
    experiment_file = retrieve_from_repo(i_exp)
    # Not used # refl_file = "/global/cfs/cdirs/m3562/der/run795/top_%d.refl" % i_exp
    cuda = True  # False  # whether to use cuda
    omp = False
    ngpu_on_node = 1 # 8  # number of available GPUs
    mosaic_spread = 0.07  # degrees
    mosaic_spread_samples = params.mosaic_spread_samples # number of mosaic blocks sampling mosaicity
    Ncells_abc = 30, 30, 10  # medians from best stage1
    ev_res = 1.5  # resolution of the downsample spectrum
    total_flux = 1e12  # total flux across channels
    beamsize_mm = 0.000886226925452758  # sqrt of beam focal area
    spot_scale = 500. # 5.16324  # median from best stage1
    plot_spec = False  # plot the downsample spectra before simulating
    oversample = 1  # oversample factor, 1,2, or 3 probable enough
    panel_list = None  # integer list of panels, usefule for debugging
    rois_only = False  # only set True if you are running openMP, or CPU-only (i.e. not for GPU)
    include_background = params.include_background   # default is to add water background 100 mm thick
    verbose = 0  # leave as 0, unles debug
    flat = True  # enfore that the camera has 0 thickness
    #<><><><><><><><>
    # XXX new code
    El = ExperimentListFactory.from_json_file(experiment_file,
                                              check_format=True)
    exper = El[0]

    crystal = exper.crystal
    detector = exper.detector
    if flat:
        from dxtbx_model_ext import SimplePxMmStrategy
        for panel in detector:
            panel.set_px_mm_strategy(SimplePxMmStrategy())
            panel.set_mu(0)
            panel.set_thickness(0)

    beam = exper.beam

    # XXX new code
    spec = exper.imageset.get_spectrum(0)
    energies_raw, weights_raw = spec.get_energies_eV().as_numpy_array(), \
                                spec.get_weights().as_numpy_array()
    energies, weights = utils.downsample_spectrum(energies_raw, weights_raw, method=1, total_flux=total_flux,
                                                  ev_width=ev_res)

    if flat:
        assert detector[0].get_thickness() == 0

    if panel_list is None:
        panel_list = list(range(len(detector)))

    pids_for_rank = panel_list
    device_Id = 0
    if gpu_channels_singleton is not None:
      device_Id = gpu_channels_singleton.get_deviceID()

    print("Rank %d will use device %d" % (rank, device_Id))
    show_params = False
    time_panels = (rank == 0)

    mn_energy = (energies*weights).sum() / weights.sum()
    mn_wave = utils.ENERGY_CONV / mn_energy

    if params.use_exascale_api:
      BEG=time()
      print (gpu_channels_singleton.get_deviceID(),"device")
      Famp_is_uninitialized = ( gpu_channels_singleton.get_nchannels() == 0 ) # uninitialized
      if Famp_is_uninitialized:
        F_P1 = Fmerge.expand_to_p1()
        for x in range(1):  # in this scenario, amplitudes are independent of lambda
          gpu_channels_singleton.structure_factors_to_GPU_direct(
          x, F_P1.indices(), F_P1.data())
      assert gpu_channels_singleton.get_nchannels() == 1

      JF16M_numpy_array, TIME_BG, TIME_BRAGG, _ = multipanel_sim(
        CRYSTAL=crystal, DETECTOR=detector, BEAM=beam,
        Famp = gpu_channels_singleton,
        energies=list(energies), fluxes=list(weights),
        background_wavelengths=[mn_wave], background_wavelength_weights=[1],
        background_total_flux=total_flux,background_sample_thick_mm=0.5,
        cuda=True,
        oversample=oversample, Ncells_abc=Ncells_abc,
        mos_dom=mosaic_spread_samples, mos_spread=mosaic_spread,
        mosaic_method=params.mosaic_method,
        beamsize_mm=beamsize_mm,show_params=show_params,
        time_panels=time_panels, verbose=verbose,
        spot_scale_override=spot_scale,
        include_background=include_background,
        mask_file=params.mask_file)
      TIME_EXA = time()-BEG
      print ("Exascale time",TIME_EXA)
      if params.write_experimental_data:
        data = exper.imageset.get_raw_data(0)

      tsave = time()
      img_sh = JF16M_numpy_array.shape
      assert img_sh == (256,254,254)
      num_output_images = 1 + int(params.write_experimental_data)
      print("Saving exascale output data of shape", img_sh)
      beam_dict = beam.to_dict()
      det_dict = detector.to_dict()
      try:
        beam_dict.pop("spectrum_energies")
        beam_dict.pop("spectrum_weights")
      except Exception: pass
# XXX no longer have two separate files
      if params.write_output:
       with utils.H5AttributeGeomWriter("exap_%d.hdf5"%i_exp,
                                image_shape=img_sh, num_images=num_output_images,
                                detector=det_dict, beam=beam_dict,
                                detector_and_beam_are_dicts=True) as writer:
        writer.add_image(JF16M_numpy_array)

        if params.write_experimental_data:
            data = [data[pid].as_numpy_array() for pid in panel_list]
            writer.add_image(data)

       tsave = time() - tsave
       print("Saved output to file %s. Saving took %.4f sec" % ("exap_%d.hdf5"%i_exp, tsave, ))

    BEG2 = time()
    #optional background
    TIME_BG2 = time()
    backgrounds = {pid: None for pid in panel_list}
    if include_background:
        backgrounds = {pid: utils.sim_background( # default is for water
                detector, beam, wavelengths=[mn_wave], wavelength_weights=[1],
                total_flux=total_flux, Fbg_vs_stol=water,
                pidx=pid, beam_size_mm=beamsize_mm, sample_thick_mm=0.5)
            for pid in pids_for_rank}
    TIME_BG2 = time()-TIME_BG2

    TIME_BRAGG2 = time()
    pid_and_pdata = utils.flexBeam_sim_colors(
      CRYSTAL=crystal, DETECTOR=detector, BEAM=beam,
      energies=list(energies), fluxes=list(weights), Famp=Fmerge,
      pids=pids_for_rank, cuda=cuda, device_Id=device_Id,
      oversample=oversample, Ncells_abc=Ncells_abc, verbose=verbose,
      time_panels=time_panels, show_params=show_params, spot_scale_override=spot_scale,
      mos_dom=mosaic_spread_samples, mos_spread=mosaic_spread, beamsize_mm=beamsize_mm,
      background_raw_pixels=backgrounds, include_noise=False, rois_perpanel=None)
    TIME_BRAGG2 = time()-TIME_BRAGG2
    pid_and_pdata = sorted(pid_and_pdata, key=lambda x: x[0])
    _, pdata = zip(*pid_and_pdata)
    TIME_VINTAGE = time()-BEG2

    print("\n<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>")
    print("\tBreakdown:")
    if params.use_exascale_api:
        print("\t\tExascale: time for bkgrd sim: %.4fs; Bragg sim: %.4fs; total: %.4fs" % (TIME_BG, TIME_BRAGG, TIME_EXA))
    print("\t\tVintage:  time for bkgrd sim: %.4fs; Bragg sim: %.4fs; total: %.4fs" % (TIME_BG2, TIME_BRAGG2, TIME_VINTAGE))
    print("<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>\n")

    if params.test_pixel_congruency and params.use_exascale_api:
      abs_diff = np.abs(np.array(pdata) - JF16M_numpy_array).max()
      assert np.allclose(pdata, JF16M_numpy_array), "max per-pixel difference: %f photons"%abs_diff
      print("pixel congruency: OK!")

    # pdata is a list of 256 2D numpy arrays, now.

    if len(panel_list) != len(detector):
        print("Cant save partial detector image, exiting..")
        exit()
        #from dxtbx.model import Detector
        #new_det = Detector()
        #for pid in panel_list:
        #    new_det.add_panel(detector[pid])
        #detector = new_det
    if params.write_experimental_data:
        data = exper.imageset.get_raw_data(0)

    tsave = time()
    pdata = np.array(pdata) # now pdata is a numpy array of shape 256,254,254
    img_sh = pdata.shape
    num_output_images = 3 + int(params.write_experimental_data)
    print("BOOPZ: Rank=%d ; i_exp=%d, RAM usage=%f" % (rank, i_exp,get_memory_usage()/1e6 ))
    beam_dict = beam.to_dict()
    det_dict = detector.to_dict()
    try:
      beam_dict.pop("spectrum_energies")
      beam_dict.pop("spectrum_weights")
    except Exception: pass
    if params.write_output:
      print("Saving output data of shape", img_sh)
      with utils.H5AttributeGeomWriter(outfile, image_shape=img_sh, num_images=num_output_images,
                                detector=det_dict, beam=beam_dict,
                                detector_and_beam_are_dicts=True) as writer:
        writer.add_image(JF16M_numpy_array/pdata)
        writer.add_image(JF16M_numpy_array)
        writer.add_image(pdata)

        if params.write_experimental_data:
            data = [data[pid].as_numpy_array() for pid in panel_list]
            writer.add_image(data)

      tsave = time() - tsave
      print("Saved output to file %s. Saving took %.4f sec" % (outfile, tsave, ))
Exemplo n.º 8
0
Arquivo: cosym.py Projeto: dials/cctbx
    def run(self, input_experiments, input_reflections):
        from collections import OrderedDict
        if self.mpi_helper.rank == 0:
            print("Starting cosym worker")
            #Overall = Profiler("Cosym total time")

        #  Evenly distribute all experiments from mpi_helper ranks
        reports = self.mpi_helper.comm.gather(
            (len(input_experiments)),
            root=0)  # report from all ranks on experiment count
        if self.mpi_helper.rank == 0:
            from xfel.merging.application.modify.token_passing_left_right import construct_src_to_dst_plan
            plan = construct_src_to_dst_plan(
                flex.int(reports), self.params.modify.cosym.tranch_size,
                self.mpi_helper.comm)
        else:
            plan = 0
        plan = self.mpi_helper.comm.bcast(plan, root=0)
        dst_offset = 1 if self.mpi_helper.size > 1 else 0  # decision whether to reserve rank 0 for parallel anchor determination
        # FIXME XXX probably need to look at plan size to decide dst_offset or not
        from xfel.merging.application.modify.token_passing_left_right import apply_all_to_all
        tokens = apply_all_to_all(plan=plan,
                                  dst_offset=dst_offset,
                                  value=(input_experiments, input_reflections),
                                  comm=self.mpi_helper.comm)

        if self.params.modify.cosym.anchor:
            if self.mpi_helper.rank == 0:
                MIN_ANCHOR = 20
                from xfel.merging.application.modify.token_passing_left_right import construct_anchor_src_to_dst_plan
                anchor_plan = construct_anchor_src_to_dst_plan(
                    MIN_ANCHOR, flex.int(reports),
                    self.params.modify.cosym.tranch_size, self.mpi_helper.comm)
            else:
                anchor_plan = 0
            anchor_plan = self.mpi_helper.comm.bcast(anchor_plan, root=0)
        self.logger.log_step_time("COSYM")

        if self.params.modify.cosym.plot.interactive:
            self.params.modify.cosym.plot.filename = None

        has_tokens = len(tokens) > 0
        all_has_tokens = self.mpi_helper.comm.allgather(has_tokens)
        ranks_with_tokens = [
            i for (i, val) in enumerate(all_has_tokens) if val
        ]
        ranks_to_plot = ranks_with_tokens[:self.params.modify.cosym.plot.n_max]
        do_plot = (self.params.modify.cosym.plot.do_plot
                   and self.mpi_helper.rank in ranks_to_plot)

        if len(
                tokens
        ) > 0:  # Only select ranks that have been assigned tranch data, for mutual coset determination
            # because cosym has a problem with hashed identifiers, use simple experiment identifiers
            sampling_experiments_for_cosym = ExperimentList()
            sampling_reflections_for_cosym = [
            ]  # is a list of flex.reflection_table
            COSYM = self.task_c(self.params,
                                self.mpi_helper,
                                self.logger,
                                tokens,
                                sampling_experiments_for_cosym,
                                sampling_reflections_for_cosym,
                                communicator_size=self.mpi_helper.size,
                                do_plot=do_plot)
            self.uuid_cache = COSYM.uuid_cache  # reformed uuid list after n_refls filter

            rank_N_refl = flex.double([r.size() for r in COSYM.reflections])
            message = """Task 1. Prepare the data for cosym
    change_of_basis_ops_to_minimum_cell
    eliminate_sys_absent
    transform models into Miller arrays, putting data in primitive triclinic reduced cell
    There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % (
                len(COSYM.experiments), flex.sum(rank_N_refl),
                flex.mean(rank_N_refl))
            self.logger.log(message)
            if self.mpi_helper.rank == 1:
                print(message)  #; P = Timer("COSYM.run")
            COSYM.run()
            #if self.mpi_helper.rank == 1: del P

            keyval = [("experiment", []), ("reindex_op", []), ("coset", [])]
            raw = OrderedDict(keyval)

            if self.mpi_helper.rank == 0:
                print("Rank", self.mpi_helper.rank, "experiments:",
                      len(sampling_experiments_for_cosym))

            for sidx in range(len(self.uuid_cache)):
                raw["experiment"].append(self.uuid_cache[sidx])

                sidx_plus = sidx

                try:
                    minimum_to_input = COSYM.cb_op_to_minimum[
                        sidx_plus].inverse()
                except Exception as e:
                    print("raising", e, sidx_plus, len(COSYM.cb_op_to_minimum))
                    raise e

                reindex_op = minimum_to_input * \
                             sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \
                             COSYM.cb_op_to_minimum[sidx_plus]

                # Keep this block even though not currently used; need for future assertions:
                LG = COSYM.cosym_analysis.target._lattice_group
                LGINP = LG.change_basis(
                    COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                        minimum_to_input)
                SG = COSYM.cosym_analysis.input_space_group
                SGINP = SG.change_basis(
                    COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                        minimum_to_input)
                CO = sgtbx.cosets.left_decomposition(LGINP, SGINP)
                partitions = CO.partitions
                this_reindex_op = reindex_op.as_hkl()
                this_coset = None
                for p_no, partition in enumerate(partitions):
                    partition_ops = [
                        change_of_basis_op(ip).as_hkl() for ip in partition
                    ]
                    if this_reindex_op in partition_ops:
                        this_coset = p_no
                        break
                assert this_coset is not None
                raw["coset"].append(this_coset)
                raw["reindex_op"].append(this_reindex_op)

            keys = list(raw.keys())
            from pandas import DataFrame as df
            data = df(raw)
            # major assumption is that all the coset decompositions "CO" are the same.  NOT sure if a test is needed.
            reports = self.mpi_helper.comm.gather((data, CO), root=0)
        else:
            reports = self.mpi_helper.comm.gather(None, root=0)
        if self.mpi_helper.rank == 0:
            # report back to rank==0 and reconcile all coset assignments
            while None in reports:
                reports.pop(reports.index(None))
            # global CO
            global_coset_decomposition = reports[0][
                1]  # again, assuming here they are all the same XXX
        else:
            global_coset_decomposition = 0
        global_coset_decomposition = self.mpi_helper.comm.bcast(
            global_coset_decomposition, root=0)
        partitions = global_coset_decomposition.partitions
        self.mpi_helper.comm.barrier()
        # end of distributed embedding

        if self.params.modify.cosym.anchor:
            anchor_tokens = apply_all_to_all(plan=anchor_plan,
                                             dst_offset=0,
                                             value=(input_experiments,
                                                    input_reflections),
                                             comm=self.mpi_helper.comm)

        if self.mpi_helper.rank == 0:
            from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports
            REC = reconcile_cosym_reports(reports)
            results = REC.composite_tranch_merge(voting_method="consensus")

            # at this point we have the opportunity to reconcile the results with an anchor
            # recycle the data structures for anchor determination
            if self.params.modify.cosym.anchor:
                sampling_experiments_for_cosym, sampling_reflections_for_cosym = self.task_a(
                    self.params)
                ANCHOR = self.task_c(
                    self.params,
                    self.mpi_helper,
                    self.logger,
                    anchor_tokens,
                    sampling_experiments_for_cosym,
                    sampling_reflections_for_cosym,
                    uuid_starting=["anchor structure"],
                    communicator_size=1)  # only run on the rank==0 tranch.
                self.uuid_cache = ANCHOR.uuid_cache  # reformed uuid list after n_refls filter
                #P = Timer("ANCHOR.run")
                ANCHOR.run(
                )  # Future redesign XXX FIXME do this in rank 0 in parallel with distributed composite tranches
                #del P

                keyval = [("experiment", []), ("coset", [])]
                raw = OrderedDict(keyval)
                print("Anchor", "experiments:",
                      len(sampling_experiments_for_cosym))

                anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \
                           sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \
                           ANCHOR.cb_op_to_minimum[0]
                anchor_coset = None
                for p_no, partition in enumerate(partitions):
                    partition_ops = [
                        change_of_basis_op(ip).as_hkl() for ip in partition
                    ]
                    if anchor_op.as_hkl() in partition_ops:
                        anchor_coset = p_no
                        break
                assert anchor_coset is not None
                print("The consensus for the anchor is", anchor_op.as_hkl(),
                      " anchor coset", anchor_coset)

                raw["experiment"].append("anchor structure")
                raw["coset"].append(anchor_coset)
                for sidx in range(1, len(self.uuid_cache)):
                    raw["experiment"].append(self.uuid_cache[sidx])

                    sidx_plus = sidx

                    minimum_to_input = ANCHOR.cb_op_to_minimum[
                        sidx_plus].inverse()
                    reindex_op = minimum_to_input * \
                             sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \
                             ANCHOR.cb_op_to_minimum[sidx_plus]
                    this_reindex_op = reindex_op.as_hkl()
                    this_coset = None
                    for p_no, partition in enumerate(partitions):
                        partition_ops = [
                            change_of_basis_op(ip).as_hkl() for ip in partition
                        ]
                        if this_reindex_op in partition_ops:
                            this_coset = p_no
                            break
                    assert this_coset is not None
                    raw["coset"].append(this_coset)

                from pandas import DataFrame as df
                anchor_data = df(raw)
                REC.reconcile_with_anchor(results, anchor_data, anchor_op)
                # no need for return value; results dataframe is modified in place

            if self.params.modify.cosym.dataframe:
                import os
                results.to_pickle(
                    path=os.path.join(self.params.output.output_dir,
                                      self.params.modify.cosym.dataframe))
            transmitted = results
        else:
            transmitted = 0
        self.mpi_helper.comm.barrier()
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        # "transmitted" holds the global coset assignments

        #subselect expt and refl on the successful coset assignments
        # output:  experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place)
        result_experiments_for_cosym = ExperimentList()
        good_refls = flex.bool(len(input_reflections), False)
        good_expt_id = list(transmitted["experiment"])
        good_coset = list(
            transmitted["coset"]
        )  # would like to understand how to use pandas rather than Python list
        for iexpt in range(len(input_experiments)):
            iexpt_id = input_experiments[iexpt].identifier
            keepit = iexpt_id in good_expt_id
            if keepit:
                this_coset = good_coset[good_expt_id.index(iexpt_id)]
                this_cb_op = change_of_basis_op(
                    global_coset_decomposition.partitions[this_coset][0])
                accepted_expt = input_experiments[iexpt]
                if this_coset > 0:
                    accepted_expt.crystal = MosaicCrystalSauter2014(
                        accepted_expt.crystal.change_basis(this_cb_op))
                    # need to use wrapper because of cctbx/dxtbx#5
                result_experiments_for_cosym.append(accepted_expt)
                good_refls |= input_reflections["exp_id"] == iexpt_id
        selected_reflections = input_reflections.select(
            good_refls)  # XXX is this in place (double check)
        self.mpi_helper.comm.barrier()

        # still have to reindex the reflection table, but try to do it efficiently
        from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset
        if (len(result_experiments_for_cosym) > 0):
            reindex_refl_by_coset(
                refl=selected_reflections,
                data=transmitted,
                symms=[
                    E.crystal.get_crystal_symmetry()
                    for E in result_experiments_for_cosym
                ],
                uuids=[E.identifier for E in result_experiments_for_cosym],
                co=global_coset_decomposition,
                anomalous_flag=self.params.merging.merge_anomalous == False,
                verbose=False)
        # this should have re-indexed the refls in place, no need for return value

        self.mpi_helper.comm.barrier()
        # Note:  this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group)
        # in this use case we assume all inputs and outputs are in P63.
        # more complex use cases would have to reset the space group in the crystal, and recalculate
        # the ASU "miller_indicies" in the reflections table.

        self.logger.log_step_time("COSYM", True)
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(result_experiments_for_cosym,
                                        selected_reflections)
        return result_experiments_for_cosym, selected_reflections
Exemplo n.º 9
0
    def run(self, experiments, reflections):

        self.logger.log_step_time("POLARIZATION_CORRECTION")

        result = flex.reflection_table()

        for experiment in experiments:
            refls = reflections.select(
                reflections['exp_id'] == experiment.identifier)
            if len(refls) == 0: continue
            beam = experiment.beam
            # Remove the need for pixel size within cxi.merge.  Allows multipanel detector with dissimilar panels.
            # Relies on new frame extractor code called by dials.stills_process that writes s0, s1 and polarization normal
            # vectors all to the integration pickle.  Future path (IE THIS CODE): use dials json and reflection file.
            s0_vec = matrix.col(beam.get_s0()).normalize()
            s0_polar_norm = beam.get_polarization_normal()
            s1_vec = refls['s1']
            Ns1 = len(s1_vec)
            # project the s1_vector onto the plane normal to s0.  Get result by subtracting the
            # projection of s1 onto s0, which is (s1.dot.s0_norm)s0_norm
            s0_norm = flex.vec3_double(Ns1, s0_vec)
            s1_proj = (s1_vec.dot(s0_norm)) * s0_norm
            s1_in_normal_plane = s1_vec - s1_proj
            # Now want the polar angle between the projected s1 and the polarization normal
            s0_polar_norms = flex.vec3_double(Ns1, s0_polar_norm)
            dotprod = (s1_in_normal_plane.dot(s0_polar_norms))
            costheta = dotprod / (s1_in_normal_plane.norms())
            theta = flex.acos(costheta)
            cos_two_polar_angle = flex.cos(2.0 * theta)
            # gives same as old answer to ~1% but not exact.  Not sure why, should not matter.

            tt_vec = experiment.crystal.get_unit_cell().two_theta(
                miller_indices=refls['miller_index'],
                wavelength=beam.get_wavelength())
            cos_tt_vec = flex.cos(tt_vec)
            sin_tt_vec = flex.sin(tt_vec)
            cos_sq_tt_vec = cos_tt_vec * cos_tt_vec
            sin_sq_tt_vec = sin_tt_vec * sin_tt_vec
            P_nought_vec = 0.5 * (1. + cos_sq_tt_vec)

            F_prime = -1.0  # Hard-coded value defines the incident polarization axis
            P_prime = 0.5 * F_prime * cos_two_polar_angle * sin_sq_tt_vec

            # added as a diagnostic
            #prange=P_nought_vec - P_prime
            #other_F_prime = 1.0
            #otherP_prime = 0.5 * other_F_prime * cos_two_polar_angle * sin_sq_tt_vec
            #otherprange=P_nought_vec - otherP_prime
            #diff2 = flex.abs(prange - otherprange)
            #print >> out, "mean diff is",flex.mean(diff2), "range",flex.min(diff2), flex.max(diff2)
            # done

            correction = 1 / (P_nought_vec - P_prime)
            refls['intensity.sum.value'] = refls[
                'intensity.sum.value'] * correction
            refls['intensity.sum.variance'] = refls[
                'intensity.sum.variance'] * correction**2  # propagated error
            # This corrects observations for polarization assuming 100% polarization on
            # one axis (thus the F_prime = -1.0 rather than the perpendicular axis, 1.0)
            # Polarization model as described by Kahn, Fourme, Gadet, Janin, Dumas & Andre
            # (1982) J. Appl. Cryst. 15, 330-337, equations 13 - 15.

            result.extend(refls)

        if len(reflections) > 0:
            self.logger.log(
                "Applied polarization correction. Mean intensity changed from %.2f to %.2f"
                % (flex.mean(reflections['intensity.sum.value']),
                   flex.mean(result['intensity.sum.value'])))

        self.logger.log_step_time("POLARIZATION_CORRECTION", True)
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        # Remove 's1' column from the reflection table
        from xfel.merging.application.reflection_table_utils import reflection_table_utils
        reflections = reflection_table_utils.prune_reflection_table_keys(
            reflections=result, keys_to_delete=['s1'])
        self.logger.log("Pruned reflection table")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        return experiments, reflections
Exemplo n.º 10
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters

            # optionally write a file list mapping to disk, useful in post processing if save_experiments_and_reflections=True
            file_id_from_names = None
            if self.params.output.expanded_bookkeeping:
                apath = lambda x: os.path.abspath(x)
                file_names_from_id = {
                    i_f: tuple(map(apath, exp_ref_pair))
                    for i_f, exp_ref_pair in enumerate(file_list)
                }
                with open(
                        os.path.join(self.params.output.output_dir,
                                     "file_list_map.json"), "w") as o:
                    json.dump(file_names_from_id, o)
                file_id_from_names = {
                    tuple(map(apath, exp_ref_pair)): i_f
                    for i_f, exp_ref_pair in enumerate(file_list)
                }

            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list, file_id_from_names
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        new_file_list, file_names_mapping = self.mpi_helper.comm.bcast(
            transmitted, root=0)
        new_file_list = new_file_list[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(new_file_list) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                self.logger.log("Reading %s %s" %
                                (experiments_filename, reflections_filename))
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename,
                    check_format=self.params.input.read_image_headers)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                if self.params.output.expanded_bookkeeping:
                    # NOTE: these are un-prunable
                    reflections["input_refl_index"] = flex.int(
                        list(range(len(reflections))))
                    reflections["orig_exp_id"] = reflections['id']
                    assert file_names_mapping is not None
                    exp_ref_pair = os.path.abspath(
                        experiments_filename), os.path.abspath(
                            reflections_filename)
                    this_refl_fileMappings = [
                        file_names_mapping[exp_ref_pair]
                    ] * len(reflections)
                    reflections["file_list_mapping"] = flex.int(
                        this_refl_fileMappings)
                self.logger.log("Data read, prepping")

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                new_ids = flex.int(len(reflections), -1)
                new_identifiers = flex.std_string(len(reflections))
                eid = reflections.experiment_identifiers()
                for k in eid.keys():
                    del eid[k]

                if self.params.output.expanded_bookkeeping:
                    preGen_experiment_identifiers(experiments,
                                                  experiments_filename)
                for experiment_id, experiment in enumerate(experiments):
                    # select reflections of the current experiment
                    refls_sel = reflections['id'] == experiment_id

                    if refls_sel.count(True) == 0: continue

                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    if not self.params.input.keep_imagesets:
                        experiment.imageset = None
                    all_experiments.append(experiment)

                    # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally
                    new_identifiers.set_selected(refls_sel,
                                                 experiment.identifier)

                    new_id = len(all_experiments) - 1
                    eid[new_id] = experiment.identifier
                    new_ids.set_selected(refls_sel, new_id)
                assert (new_ids < 0
                        ).count(True) == 0, "Not all reflections accounted for"
                reflections['id'] = new_ids
                reflections['exp_id'] = new_identifiers
                all_reflections.extend(reflections)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        all_reflections = self.prune_reflection_table_keys(all_reflections)

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)
        return all_experiments, all_reflections
Exemplo n.º 11
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)

                for experiment_id, experiment in enumerate(experiments):
                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)
                    all_experiments.append(experiment)
                    #experiment.identifier = "%d"%(len(all_experiments) - 1)

                    # select reflections of the current experiment
                    refls = reflections.select(
                        reflections['id'] == experiment_id)

                    # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally
                    #refls['id'] = flex.size_t(len(refls), len(all_experiments)-1)
                    refls['exp_id'] = flex.std_string(len(refls),
                                                      experiment.identifier)

                    all_reflections.extend(refls)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.reflection_table_utils import reflection_table_utils
        all_reflections = reflection_table_utils.prune_reflection_table_keys(
            reflections=all_reflections,
            keys_to_keep=[
                'intensity.sum.value', 'intensity.sum.variance',
                'miller_index', 'miller_index_asymmetric', 'exp_id', 's1'
            ])
        self.logger.log("Pruned reflection table")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)

        return all_experiments, all_reflections
Exemplo n.º 12
0
    def run(self, experiments, reflections):

        assert self.mpi_helper.size not in [2,3,4], "Please run modify_cosym on " \
            "1 or >= 5 MPI ranks."

        self.logger.log_step_time("COSYM")

        all_sampling_experiments = experiments
        all_sampling_reflections = reflections
        # because cosym has a problem with hashed identifiers, use simple experiment identifiers
        from dxtbx.model.experiment_list import ExperimentList
        sampling_experiments_for_cosym = ExperimentList()
        sampling_reflections_for_cosym = [
        ]  # is a list of flex.reflection_table

        def task_a():
            # add an anchor
            if self.params.modify.cosym.anchor:
                from xfel.merging.application.model.crystal_model import crystal_model
                XM = crystal_model(params=self.params, purpose="cosym")
                model_intensities = XM.run([], [])
                from dxtbx.model import Experiment, Crystal
                from scitbx.matrix import sqr
                O = sqr(model_intensities.unit_cell().orthogonalization_matrix(
                )).transpose().elems
                real_a = (O[0], O[1], O[2])
                real_b = (O[3], O[4], O[5])
                real_c = (O[6], O[7], O[8])
                nc = Crystal(real_a, real_b, real_c,
                             model_intensities.space_group())
                sampling_experiments_for_cosym.append(
                    Experiment(crystal=nc)
                )  # prepends the reference model to the cosym E-list
                from dials.array_family import flex

                exp_reflections = flex.reflection_table()
                exp_reflections[
                    'intensity.sum.value'] = model_intensities.data()
                exp_reflections['intensity.sum.variance'] = flex.pow(
                    model_intensities.sigmas(), 2)
                exp_reflections['miller_index'] = model_intensities.indices()
                exp_reflections[
                    'miller_index_asymmetric'] = model_intensities.indices()
                exp_reflections['flags'] = flex.size_t(
                    model_intensities.size(),
                    flex.reflection_table.flags.integrated_sum)

                # prepare individual reflection tables for each experiment

                simple_experiment_id = len(sampling_experiments_for_cosym) - 1
                #experiment.identifier = "%d"%simple_experiment_id
                sampling_experiments_for_cosym[
                    -1].identifier = "%d" % simple_experiment_id
                # experiment identifier must be a string according to *.h file
                # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis

                exp_reflections['id'] = flex.int(len(exp_reflections),
                                                 simple_experiment_id)
                # register the integer id as a new column in the per-experiment reflection table

                exp_reflections.experiment_identifiers(
                )[simple_experiment_id] = sampling_experiments_for_cosym[
                    -1].identifier
                #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment)

                sampling_reflections_for_cosym.append(exp_reflections)

        #if self.mpi_helper.rank == 0:
        # task_a() # no anchor for initial pass

        def task_1(uuid_starting=[], mpi_helper_size=1, do_plot=False):
            self.uuid_cache = uuid_starting
            if mpi_helper_size == 1:  # simple case, one rank
                for experiment in all_sampling_experiments:
                    sampling_experiments_for_cosym.append(experiment)
                    self.uuid_cache.append(experiment.identifier)

                    exp_reflections = all_sampling_reflections.select(
                        all_sampling_reflections['exp_id'] ==
                        experiment.identifier)
                    # prepare individual reflection tables for each experiment

                    simple_experiment_id = len(
                        sampling_experiments_for_cosym) - 1
                    #experiment.identifier = "%d"%simple_experiment_id
                    sampling_experiments_for_cosym[
                        -1].identifier = "%d" % simple_experiment_id
                    # experiment identifier must be a string according to *.h file
                    # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis

                    exp_reflections['id'] = flex.int(len(exp_reflections),
                                                     simple_experiment_id)
                    # register the integer id as a new column in the per-experiment reflection table

                    exp_reflections.experiment_identifiers(
                    )[simple_experiment_id] = sampling_experiments_for_cosym[
                        -1].identifier
                    #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment)

                    sampling_reflections_for_cosym.append(exp_reflections)
            else:  # complex case, overlap tranches for mutual coset determination
                self.mpi_helper.MPI.COMM_WORLD.barrier()
                from xfel.merging.application.modify.token_passing_left_right import token_passing_left_right
                values = token_passing_left_right((experiments, reflections))
                for tranch_experiments, tranch_reflections in values:
                    for experiment in tranch_experiments:
                        sampling_experiments_for_cosym.append(experiment)
                        self.uuid_cache.append(experiment.identifier)

                        exp_reflections = tranch_reflections.select(
                            tranch_reflections['exp_id'] ==
                            experiment.identifier)
                        # prepare individual reflection tables for each experiment

                        simple_experiment_id = len(
                            sampling_experiments_for_cosym) - 1
                        #experiment.identifier = "%d"%simple_experiment_id
                        sampling_experiments_for_cosym[
                            -1].identifier = "%d" % simple_experiment_id
                        # experiment identifier must be a string according to *.h file
                        # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis

                        exp_reflections['id'] = flex.int(
                            len(exp_reflections), simple_experiment_id)
                        # register the integer id as a new column in the per-experiment reflection table

                        exp_reflections.experiment_identifiers(
                        )[simple_experiment_id] = sampling_experiments_for_cosym[
                            -1].identifier
                        #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment)

                        sampling_reflections_for_cosym.append(exp_reflections)

            from dials.command_line import cosym as cosym_module
            cosym_module.logger = self.logger

            i_plot = self.mpi_helper.rank
            from xfel.merging.application.modify.aux_cosym import dials_cl_cosym_subclass as dials_cl_cosym_wrapper
            COSYM = dials_cl_cosym_wrapper(
                sampling_experiments_for_cosym,
                sampling_reflections_for_cosym,
                self.uuid_cache,
                params=self.params.modify.cosym,
                output_dir=self.params.output.output_dir,
                do_plot=do_plot,
                i_plot=i_plot)
            return COSYM

        if self.params.modify.cosym.plot.interactive:
            self.params.modify.cosym.plot.filename = None
        do_plot = (self.params.modify.cosym.plot.do_plot and
                   self.mpi_helper.rank < self.params.modify.cosym.plot.n_max)
        COSYM = task_1(mpi_helper_size=self.mpi_helper.size, do_plot=do_plot)
        self.uuid_cache = COSYM.uuid_cache  # reformed uuid list after n_refls filter

        import dials.algorithms.symmetry.cosym.target
        from xfel.merging.application.modify.aux_cosym import TargetWithFastRij
        dials.algorithms.symmetry.cosym.target.Target = TargetWithFastRij

        rank_N_refl = flex.double([r.size() for r in COSYM.reflections])
        message = """Task 1. Prepare the data for cosym
    change_of_basis_ops_to_minimum_cell
    eliminate_sys_absent
    transform models into Miller arrays, putting data in primitive triclinic reduced cell
    There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % (
            len(COSYM.experiments), flex.sum(rank_N_refl),
            flex.mean(rank_N_refl))
        self.logger.log(message)

        COSYM.run()

        from collections import OrderedDict
        #assert len(sampling_experiments_for_cosym) + 1 anchor if present == len(COSYM._experiments)
        keyval = [("experiment", []), ("reindex_op", []), ("coset", [])]
        raw = OrderedDict(keyval)
        print("Rank", self.mpi_helper.rank, "experiments:",
              len(sampling_experiments_for_cosym))

        for sidx in range(len(self.uuid_cache)):
            raw["experiment"].append(self.uuid_cache[sidx])

            sidx_plus = sidx

            minimum_to_input = COSYM.cb_op_to_minimum[sidx_plus].inverse()
            reindex_op = minimum_to_input * \
                           sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \
                           COSYM.cb_op_to_minimum[sidx_plus]

            # Keep this block even though not currently used; need for future assertions:
            LG = COSYM.cosym_analysis.target._lattice_group
            LGINP = LG.change_basis(
                COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                    minimum_to_input)
            SG = COSYM.cosym_analysis.input_space_group
            SGINP = SG.change_basis(
                COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis(
                    minimum_to_input)
            CO = sgtbx.cosets.left_decomposition(LGINP, SGINP)
            partitions = CO.partitions
            this_reindex_op = reindex_op.as_hkl()
            this_coset = None
            for p_no, partition in enumerate(partitions):
                partition_ops = [
                    change_of_basis_op(ip).as_hkl() for ip in partition
                ]
                if this_reindex_op in partition_ops:
                    this_coset = p_no
                    break
            assert this_coset is not None
            raw["coset"].append(this_coset)
            raw["reindex_op"].append(this_reindex_op)

        keys = list(raw.keys())
        from pandas import DataFrame as df
        data = df(raw)
        # major assumption is that all the coset decompositions "CO" are the same.  NOT sure if a test is needed.

        # report back to rank==0 and reconcile all coset assignments
        reports = self.mpi_helper.comm.gather((data, CO), root=0)
        if self.mpi_helper.rank == 0:
            from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports
            REC = reconcile_cosym_reports(reports)
            results = REC.simple_merge(voting_method="consensus")

            # at this point we have the opportunity to reconcile the results with an anchor
            # recycle the data structures for anchor determination
            if self.params.modify.cosym.anchor:
                sampling_experiments_for_cosym = ExperimentList()
                sampling_reflections_for_cosym = []
                print("ANCHOR determination")
                task_a()
                ANCHOR = task_1(
                    uuid_starting=["anchor structure"],
                    mpi_helper_size=1)  # only run on the rank==0 tranch.
                self.uuid_cache = ANCHOR.uuid_cache  # reformed uuid list after n_refls filter
                ANCHOR.run()

                keyval = [("experiment", []), ("coset", [])]
                raw = OrderedDict(keyval)
                print("Anchor", "experiments:",
                      len(sampling_experiments_for_cosym))

                anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \
                           sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \
                           ANCHOR.cb_op_to_minimum[0]
                anchor_coset = None
                for p_no, partition in enumerate(partitions):
                    partition_ops = [
                        change_of_basis_op(ip).as_hkl() for ip in partition
                    ]
                    if anchor_op.as_hkl() in partition_ops:
                        anchor_coset = p_no
                        break
                assert anchor_coset is not None
                print("The consensus for the anchor is", anchor_op.as_hkl(),
                      " anchor coset", anchor_coset)
                raw["experiment"].append("anchor structure")
                raw["coset"].append(anchor_coset)

                for sidx in range(1, len(self.uuid_cache)):
                    raw["experiment"].append(self.uuid_cache[sidx])

                    sidx_plus = sidx

                    minimum_to_input = ANCHOR.cb_op_to_minimum[
                        sidx_plus].inverse()
                    reindex_op = minimum_to_input * \
                             sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \
                             ANCHOR.cb_op_to_minimum[sidx_plus]
                    this_reindex_op = reindex_op.as_hkl()
                    this_coset = None
                    for p_no, partition in enumerate(partitions):
                        partition_ops = [
                            change_of_basis_op(ip).as_hkl() for ip in partition
                        ]
                        if this_reindex_op in partition_ops:
                            this_coset = p_no
                            break
                    assert this_coset is not None
                    raw["coset"].append(this_coset)

                from pandas import DataFrame as df
                anchor_data = df(raw)
                REC.reconcile_with_anchor(results, anchor_data, anchor_op)
                # no need for return value; results dataframe is modified in place

            if self.params.modify.cosym.dataframe:
                import os
                results.to_pickle(
                    path=os.path.join(self.params.output.output_dir,
                                      self.params.modify.cosym.dataframe))
            transmitted = results
        else:
            transmitted = None
        self.mpi_helper.comm.barrier()
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        # "transmitted" holds the global coset assignments

        # subselect expt and refl on the successful coset assignments
        # output:  experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place)
        result_experiments_for_cosym = ExperimentList()
        good_refls = flex.bool(len(reflections), False)
        good_expt_id = list(transmitted["experiment"])
        good_coset = list(
            transmitted["coset"]
        )  # would like to understand how to use pandas rather than Python list
        for iexpt in range(len(experiments)):
            iexpt_id = experiments[iexpt].identifier
            keepit = iexpt_id in good_expt_id
            if keepit:
                this_coset = good_coset[good_expt_id.index(iexpt_id)]
                this_cb_op = change_of_basis_op(CO.partitions[this_coset][0])
                accepted_expt = experiments[iexpt]
                if this_coset > 0:
                    accepted_expt.crystal = MosaicCrystalSauter2014(
                        accepted_expt.crystal.change_basis(this_cb_op))
                    # need to use wrapper because of cctbx/dxtbx#5
                result_experiments_for_cosym.append(accepted_expt)
                good_refls |= reflections["exp_id"] == iexpt_id
        reflections = reflections.select(good_refls)
        self.mpi_helper.comm.barrier()
        #if self.mpi_helper.rank == 0:
        #  import pickle
        #  with open("refl.pickle","wb") as F:
        #    pickle.dump(reflections, F)
        #    pickle.dump(transmitted, F)
        #    pickle.dump([E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym],F)
        #    pickle.dump([E.identifier for E in result_experiments_for_cosym],F)
        #    pickle.dump(CO, F)

        # still have to reindex the reflection table, but try to do it efficiently
        from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset
        reindex_refl_by_coset(
            refl=reflections,
            data=transmitted,
            symms=[
                E.crystal.get_crystal_symmetry()
                for E in result_experiments_for_cosym
            ],
            uuids=[E.identifier for E in result_experiments_for_cosym],
            co=CO,
            anomalous_flag=self.params.merging.merge_anomalous == False,
            verbose=False)
        # this should have re-indexed the refls in place, no need for return value

        self.mpi_helper.comm.barrier()
        # Note:  this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group)
        # in this use case we assume all inputs and outputs are in P63.
        # more complex use cases would have to reset the space group in the crystal, and recalculate
        # the ASU "miller_indicies" in the reflections table.

        self.logger.log_step_time("COSYM", True)
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(result_experiments_for_cosym,
                                        reflections)
        return result_experiments_for_cosym, reflections
Exemplo n.º 13
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                # NOTE: had to use slicing below because it selection no longer works...
                reflections.sort("id")
                unique_refl_ids = set(reflections['id'])
                assert len(unique_refl_ids) == len(
                    experiments
                ), "refl table and experiment list should contain data on same experiment "  # TODO: decide if this is true
                assert min(
                    reflections["id"]
                ) >= 0, "No more -1 in the id column, ideally it should be the numerical index of experiment, but beware that this is not enforced anywhere in the upstream code base"

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                for experiment_id, experiment in enumerate(experiments):
                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    all_experiments.append(experiment)

                    # select reflections of the current experiment
                    # FIXME the selection was broke for me, it raised
                    #    RuntimeError: boost::bad_get: failed value get using boost::get
                    #refls = reflections.select(reflections['id'] == experiment_id)
                    # NOTE: this is a hack due to the broken expereimnt_id selection above
                    exp_id_pos = np.where(
                        reflections['id'] == experiment_id)[0]
                    assert exp_id_pos.size, "no refls in this experiment"  # NOTE: maybe we can relax this assertion ?
                    refls = reflections[exp_id_pos[0]:exp_id_pos[-1] + 1]

                    #FIXME: how will this work if reading in multiple composite mode experiment jsons?
                    # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally
                    refls['exp_id'] = flex.std_string(len(refls),
                                                      experiment.identifier)

                    new_id = 0
                    if len(all_reflections) > 0:
                        new_id = max(all_reflections['id']) + 1

                    # FIXME: it is hard to interperet that a function call returning a changeable property
                    eid = refls.experiment_identifiers()
                    for k in eid.keys():
                        del eid[k]
                    eid[new_id] = experiment.identifier
                    refls['id'] = flex.int(len(refls), new_id)
                    all_reflections.extend(refls)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.reflection_table_utils import reflection_table_utils
        all_reflections = reflection_table_utils.prune_reflection_table_keys(
            reflections=all_reflections,
            keys_to_keep=[
                'intensity.sum.value', 'intensity.sum.variance',
                'miller_index', 'miller_index_asymmetric', 'exp_id', 's1',
                'intensity.sum.value.unmodified',
                'intensity.sum.variance.unmodified'
            ])
        self.logger.log("Pruned reflection table")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)

        return all_experiments, all_reflections
Exemplo n.º 14
0
    def run(self, experiments, reflections):

        self.logger.log_step_time("REINDEX")

        # Get list of twinning operators for this space group
        operators = twin_laws(self.params.scaling.i_model).operators
        if not operators:
            self.logger.log("No indexing ambiguity. Skipping this step.")
            return experiments, reflections
        self.logger.log("Resolving indexing ambiguity using operators h,k,l, %s"%", ".join( \
          [op.operator.r().as_hkl() for op in operators]))
        operators = [
            sgtbx.change_of_basis_op(op.operator.r().as_hkl())
            for op in operators
        ]

        result = flex.reflection_table()
        scaler = experiment_scaler(self.params, self.mpi_helper, self.logger)
        model_intensities = self.params.scaling.i_model
        target_symm = symmetry(
            unit_cell=self.params.scaling.unit_cell,
            space_group_info=self.params.scaling.space_group)

        def get_correlation(cb_op=None):
            """ Helper function to get CC to the reference given an operator """
            # Build a miller array for the experiment reflections
            exp_miller_indices = miller.set(
                target_symm, exp_reflections['miller_index_asymmetric'], True)
            exp_intensities = miller.array(
                exp_miller_indices, exp_reflections['intensity.sum.value'],
                flex.sqrt(exp_reflections['intensity.sum.variance']))
            if cb_op:
                exp_intensities = exp_intensities.change_basis(
                    cb_op).map_to_asu()

            # Extract an array of HKLs from the model to match the experiment HKLs
            matching_indices = miller.match_multi_indices(
                miller_indices_unique=model_intensities.indices(),
                miller_indices=exp_intensities.indices())

            # Least squares
            scaling_result = scaler.fit_experiment_to_reference(
                model_intensities, exp_intensities, matching_indices)
            return scaling_result.correlation if scaling_result.correlation is not None else -1

        # Test each experiment to see if an operator gives a better CC to the reference, and if it does, apply it
        for expt_id, experiment in enumerate(experiments):
            exp_reflections = reflections.select(
                reflections['exp_id'] == experiment.identifier)
            all_correlations = []
            best_correlation = get_correlation()
            all_correlations.append(best_correlation)
            best_op = None
            for cb_op in operators:
                test_correlation = get_correlation(cb_op)
                all_correlations.append(test_correlation)
                if test_correlation > best_correlation:
                    best_correlation = test_correlation
                    best_op = cb_op
            if best_op:
                exp_miller_indices = miller.set(
                    target_symm, exp_reflections['miller_index'],
                    True).change_basis(best_op)
                exp_reflections[
                    'miller_index_asymmetric'] = exp_miller_indices.map_to_asu(
                    ).indices()
                exp_reflections['miller_index'] = exp_miller_indices.indices()
                experiment.crystal = MosaicCrystalSauter2014(
                    experiment.crystal.change_basis(best_op)
                )  # need to use wrapper because of cctbx/dxtbx#5
            result.extend(exp_reflections)

            self.logger.log(
                "Expt %d, reindexing op correlations: %s" %
                (expt_id, ", ".join(["%6.3f" % c for c in all_correlations])))

        self.logger.log_step_time("REINDEX", True)
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        return experiments, result