def run(self, experiments, reflections): self.logger.log("Rebalancing input load -- %s method..." % self.params.input.parallel_file_load.balance) if self.mpi_helper.rank == 0: self.logger.main_log("Rebalancing input load -- %s method..." % self.params.input.parallel_file_load.balance) if self.params.input.parallel_file_load.balance == "global": new_experiments, new_reflections = self.distribute_over_ranks( experiments, reflections, self.mpi_helper.comm, self.mpi_helper.size) elif self.params.input.parallel_file_load.balance == "per_node": mpi_color = int( self.mpi_helper.rank / self.params.input.parallel_file_load.ranks_per_node) mpi_new_rank = self.mpi_helper.rank % self.params.input.parallel_file_load.ranks_per_node mpi_split_comm = self.mpi_helper.comm.Split( mpi_color, mpi_new_rank) new_experiments, new_reflections = self.distribute_over_ranks( experiments, reflections, mpi_split_comm, self.params.input.parallel_file_load.ranks_per_node) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(new_experiments, new_reflections) return new_experiments, new_reflections
def run(self, experiments, reflections): if 'significance_filter' in self.params.select.algorithm: experiments, reflections = self.apply_significance_filter( experiments, reflections) # Do we have any data left? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(experiments, reflections) return experiments, reflections
def run(self, experiments, reflections): self.logger.log("Rebalancing input load -- %s method..." % self.params.input.parallel_file_load.balance) if self.mpi_helper.rank == 0: self.logger.main_log("Rebalancing input load -- %s method..." % self.params.input.parallel_file_load.balance) if self.params.input.parallel_file_load.balance == "global": new_experiments, new_reflections = self.distribute_over_ranks( experiments, reflections, self.mpi_helper.comm, self.mpi_helper.size) elif self.params.input.parallel_file_load.balance == "per_node": mpi_color = int( self.mpi_helper.rank / self.params.input.parallel_file_load.ranks_per_node) mpi_new_rank = self.mpi_helper.rank % self.params.input.parallel_file_load.ranks_per_node mpi_split_comm = self.mpi_helper.comm.Split( mpi_color, mpi_new_rank) new_experiments, new_reflections = self.distribute_over_ranks( experiments, reflections, mpi_split_comm, self.params.input.parallel_file_load.ranks_per_node) if self.params.input.parallel_file_load.reset_experiment_id_column: self.logger.log('Starting id column reset') id_map = new_reflections.experiment_identifiers() reverse_map = {} for expt_id, experiment in enumerate(new_experiments): id_map[expt_id] = experiment.identifier reverse_map[experiment.identifier] = expt_id id_col = new_reflections['id'] ident_col = new_reflections['exp_id'] for i in range(len(new_reflections)): id_col[i] = reverse_map[ident_col[i]] self.logger.log('Column reset done') # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(new_experiments, new_reflections) return new_experiments, new_reflections
def run(self, experiments, reflections): self.logger.log_step_time("SCALE_FRAMES") if self.params.scaling.algorithm != "mark0": # mark1 implies no scaling/post-refinement self.logger.log("No scaling was done") if self.mpi_helper.rank == 0: self.logger.main_log("No scaling was done") return experiments, reflections new_experiments = ExperimentList() new_reflections = flex.reflection_table() # scale experiments, one at a time. Reject experiments that do not correlate with the reference or fail to scale. results = [] slopes = [] correlations = [] high_res_experiments = 0 experiments_rejected_because_of_low_signal = 0 experiments_rejected_because_of_low_correlation_with_reference = 0 target_symm = symmetry( unit_cell=self.params.scaling.unit_cell, space_group_info=self.params.scaling.space_group) for experiment in experiments: exp_reflections = reflections.select( reflections['exp_id'] == experiment.identifier) # Build a miller array for the experiment reflections exp_miller_indices = miller.set( target_symm, exp_reflections['miller_index_asymmetric'], True) exp_intensities = miller.array( exp_miller_indices, exp_reflections['intensity.sum.value'], flex.double( flex.sqrt(exp_reflections['intensity.sum.variance']))) model_intensities = self.params.scaling.i_model # Extract an array of HKLs from the model to match the experiment HKLs matching_indices = miller.match_multi_indices( miller_indices_unique=model_intensities.indices(), miller_indices=exp_intensities.indices()) # Least squares result = self.fit_experiment_to_reference(model_intensities, exp_intensities, matching_indices) if result.error == scaling_result.err_low_signal: experiments_rejected_because_of_low_signal += 1 continue elif result.error == scaling_result.err_low_correlation: experiments_rejected_because_of_low_correlation_with_reference += 1 continue slopes.append(result.slope) correlations.append(result.correlation) if self.params.output.log_level == 0: self.logger.log( "Experiment ID: %s; Slope: %f; Correlation %f" % (experiment.identifier, result.slope, result.correlation)) # count high resolution experiments if exp_intensities.d_min() <= self.params.merging.d_min: high_res_experiments += 1 # apply scale factors if not self.params.postrefinement.enable: exp_reflections['intensity.sum.value'] *= result.slope exp_reflections['intensity.sum.variance'] *= (result.slope**2) new_experiments.append(experiment) new_reflections.extend(exp_reflections) rejected_experiments = len(experiments) - len(new_experiments) assert rejected_experiments == experiments_rejected_because_of_low_signal + \ experiments_rejected_because_of_low_correlation_with_reference reflections_removed_because_of_rejected_experiments = reflections.size( ) - new_reflections.size() self.logger.log("Experiments rejected because of low signal: %d" % experiments_rejected_because_of_low_signal) self.logger.log( "Experiments rejected because of low correlation with reference: %d" % experiments_rejected_because_of_low_correlation_with_reference) self.logger.log( "Reflections rejected because of rejected experiments: %d" % reflections_removed_because_of_rejected_experiments) self.logger.log("High resolution experiments: %d" % high_res_experiments) if self.params.postrefinement.enable: self.logger.log( "Note: scale factors were not applied, because postrefinement is enabled" ) # MPI-reduce all counts comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI total_experiments_rejected_because_of_low_signal = comm.reduce( experiments_rejected_because_of_low_signal, MPI.SUM, 0) total_experiments_rejected_because_of_low_correlation_with_reference = comm.reduce( experiments_rejected_because_of_low_correlation_with_reference, MPI.SUM, 0) total_reflections_removed_because_of_rejected_experiments = comm.reduce( reflections_removed_because_of_rejected_experiments, MPI.SUM, 0) total_high_res_experiments = comm.reduce(high_res_experiments, MPI.SUM, 0) all_slopes = comm.reduce(slopes, MPI.SUM, 0) all_correlations = comm.reduce(correlations, MPI.SUM, 0) # rank 0: log data statistics if self.mpi_helper.rank == 0: self.logger.main_log( 'Experiments rejected because of low signal: %d' % total_experiments_rejected_because_of_low_signal) self.logger.main_log( 'Experiments rejected because of low correlation with reference: %d' % total_experiments_rejected_because_of_low_correlation_with_reference ) self.logger.main_log( 'Reflections rejected because of rejected experiments: %d' % total_reflections_removed_because_of_rejected_experiments) self.logger.main_log( 'Experiments with high resolution of %5.2f Angstrom or better: %d' % (self.params.merging.d_min, total_high_res_experiments)) if len(all_slopes) > 0: stats_slope = flex.mean_and_variance(flex.double(all_slopes)) self.logger.main_log( 'Average experiment scale factor wrt reference: %f' % (stats_slope.mean())) if len(all_correlations) > 0: stats_correlation = flex.mean_and_variance( flex.double(all_correlations)) self.logger.main_log( 'Average experiment correlation with reference: %f +/- %f' % (stats_correlation.mean(), stats_correlation.unweighted_sample_standard_deviation())) if self.params.postrefinement.enable: self.logger.main_log( "Note: scale factors were not applied, because postrefinement is enabled" ) self.logger.log_step_time("SCALE_FRAMES", True) # Do we have any data left? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(new_experiments, new_reflections) return new_experiments, new_reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: self.logger.log("Reading %s %s" % (experiments_filename, reflections_filename)) experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) self.logger.log("Data read, prepping") if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 new_ids = flex.int(len(reflections), -1) new_identifiers = flex.std_string(len(reflections)) eid = reflections.experiment_identifiers() for k in eid.keys(): del eid[k] for experiment_id, experiment in enumerate(experiments): # select reflections of the current experiment refls_sel = reflections['id'] == experiment_id if refls_sel.count(True) == 0: continue if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) if not self.params.input.keep_imagesets: experiment.imageset = None all_experiments.append(experiment) # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally new_identifiers.set_selected(refls_sel, experiment.identifier) new_id = len(all_experiments) - 1 eid[new_id] = experiment.identifier new_ids.set_selected(refls_sel, new_id) assert (new_ids < 0 ).count(True) == 0, "Not all reflections accounted for" reflections['id'] = new_ids reflections['exp_id'] = new_identifiers all_reflections.extend(reflections) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) all_reflections = self.prune_reflection_table_keys(all_reflections) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(self, experiments, reflections): self.logger.log_step_time("POSTREFINEMENT") if (not self.params.postrefinement.enable) or ( self.params.scaling.algorithm != "mark0"): # mark1 implies no scaling/post-refinement self.logger.log("No post-refinement was done") if self.mpi_helper.rank == 0: self.logger.main_log("No post-refinement was done") return experiments, reflections target_symm = symmetry( unit_cell=self.params.scaling.unit_cell, space_group_info=self.params.scaling.space_group) i_model = self.params.scaling.i_model miller_set = self.params.scaling.miller_set # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, miller_set, and the reference # data set, i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. assert len(i_model.indices()) == len(miller_set.indices()) assert (i_model.indices() == miller_set.indices()).count(False) == 0 new_experiments = ExperimentList() new_reflections = flex.reflection_table() experiments_rejected_by_reason = {} # reason:how_many_rejected for experiment in experiments: exp_reflections = reflections.select( reflections['exp_id'] == experiment.identifier) # Build a miller array with _original_ miller indices of the experiment reflections exp_miller_indices_original = miller.set( target_symm, exp_reflections['miller_index'], not self.params.merging.merge_anomalous) observations_original_index = miller.array( exp_miller_indices_original, exp_reflections['intensity.sum.value'], flex.sqrt(exp_reflections['intensity.sum.variance'])) assert exp_reflections.size() == exp_miller_indices_original.size() assert observations_original_index.size( ) == exp_miller_indices_original.size() # Build a miller array with _asymmetric_ miller indices of the experiment reflections exp_miller_indices_asu = miller.set( target_symm, exp_reflections['miller_index_asymmetric'], True) observations = miller.array( exp_miller_indices_asu, exp_reflections['intensity.sum.value'], flex.sqrt(exp_reflections['intensity.sum.variance'])) matches = miller.match_multi_indices( miller_indices_unique=miller_set.indices(), miller_indices=observations.indices()) pair1 = flex.int([pair[1] for pair in matches.pairs() ]) # refers to the observations pair0 = flex.int([pair[0] for pair in matches.pairs() ]) # refers to the model # narrow things down to the set that matches, only observations_pair1_selected = observations.customized_copy( indices=flex.miller_index( [observations.indices()[p] for p in pair1]), data=flex.double([observations.data()[p] for p in pair1]), sigmas=flex.double([observations.sigmas()[p] for p in pair1])) observations_original_index_pair1_selected = observations_original_index.customized_copy( indices=flex.miller_index( [observations_original_index.indices()[p] for p in pair1]), data=flex.double( [observations_original_index.data()[p] for p in pair1]), sigmas=flex.double( [observations_original_index.sigmas()[p] for p in pair1])) I_observed = observations_pair1_selected.data() MILLER = observations_original_index_pair1_selected.indices() ORI = crystal_orientation(experiment.crystal.get_A(), basis_type.reciprocal) Astar = matrix.sqr(ORI.reciprocal_matrix()) Astar_from_experiment = matrix.sqr(experiment.crystal.get_A()) assert Astar == Astar_from_experiment WAVE = experiment.beam.get_wavelength() BEAM = matrix.col((0.0, 0.0, -1. / WAVE)) BFACTOR = 0. MOSAICITY_DEG = experiment.crystal.get_half_mosaicity_deg() DOMAIN_SIZE_A = experiment.crystal.get_domain_size_ang() # calculation of correlation here I_reference = flex.double( [i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) use_weights = False # New facility for getting variance-weighted correlation if use_weights: # variance weighting I_weight = flex.double([ 1. / (observations_pair1_selected.sigmas()[pair[1]])**2 for pair in matches.pairs() ]) else: I_weight = flex.double( len(observations_pair1_selected.sigmas()), 1.) I_weight.set_selected(I_invalid, 0.) """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement: include_negatives = True + and - reflections both used for Rh distribution for initial estimate of RS parameter + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) include_negatives = False + and - reflections both used for Rh distribution for initial estimate of RS parameter + reflections only used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) NOTE: by the new design, "include negatives" is always True """ SWC = simple_weighted_correlation(I_weight, I_reference, I_observed) if self.params.output.log_level == 0: self.logger.log("Old correlation is: %f" % SWC.corr) if self.params.postrefinement.algorithm == "rs": Rhall = flex.double() for mill in MILLER: H = matrix.col(mill) Xhkl = Astar * H Rh = (Xhkl + BEAM).length() - (1. / WAVE) Rhall.append(Rh) Rs = math.sqrt(flex.mean(Rhall * Rhall)) RS = 1. / 10000. # reciprocal effective domain size of 1 micron RS = Rs # try this empirically determined approximate, monochrome, a-mosaic value current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.]) parameterization_class = rs_parameterization refinery = rs_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) elif self.params.postrefinement.algorithm == "eta_deff": eta_init = 2. * MOSAICITY_DEG * math.pi / 180. D_eff_init = 2. * DOMAIN_SIZE_A current = flex.double( [SWC.slope, BFACTOR, eta_init, 0., 0., D_eff_init]) parameterization_class = eta_deff_parameterization refinery = eta_deff_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) func = refinery.fvec_callable(parameterization_class(current)) functional = flex.sum(func * func) if self.params.output.log_level == 0: self.logger.log("functional: %f" % functional) self.current = current self.parameterization_class = parameterization_class self.refinery = refinery self.observations_pair1_selected = observations_pair1_selected self.observations_original_index_pair1_selected = observations_original_index_pair1_selected error_detected = False try: self.run_plain() result_observations_original_index, result_observations, result_matches = self.result_for_cxi_merge( ) assert result_observations_original_index.size( ) == result_observations.size() assert result_matches.pairs().size( ) == result_observations_original_index.size() except (AssertionError, ValueError, RuntimeError) as e: error_detected = True reason = repr(e) if not reason: reason = "Unknown error" if not reason in experiments_rejected_by_reason: experiments_rejected_by_reason[reason] = 1 else: experiments_rejected_by_reason[reason] += 1 if not error_detected: new_experiments.append(experiment) new_exp_reflections = flex.reflection_table() new_exp_reflections[ 'miller_index_asymmetric'] = result_observations.indices() new_exp_reflections[ 'intensity.sum.value'] = result_observations.data() new_exp_reflections['intensity.sum.variance'] = flex.pow( result_observations.sigmas(), 2) new_exp_reflections['exp_id'] = flex.std_string( len(new_exp_reflections), experiment.identifier) # The original reflection table, i.e. the input to this run() method, has more columns than those used # for the postrefinement ("data" and "sigma" in the miller arrays). The problems is: some of the input reflections may have been rejected by now. # So to bring those extra columns over to the new reflection table, we have to create a subset of the original exp_reflections table, # which would match (by original miller indices) the miller array results of the postrefinement. match_original_indices = miller.match_multi_indices( miller_indices_unique=exp_miller_indices_original.indices( ), miller_indices=result_observations_original_index.indices( )) exp_reflections_match_results = exp_reflections.select( match_original_indices.pairs().column(0)) assert (exp_reflections_match_results['intensity.sum.value'] == result_observations_original_index.data() ).count(False) == 0 new_exp_reflections[ 'intensity.sum.value.unmodified'] = exp_reflections_match_results[ 'intensity.sum.value.unmodified'] new_exp_reflections[ 'intensity.sum.variance.unmodified'] = exp_reflections_match_results[ 'intensity.sum.variance.unmodified'] new_reflections.extend(new_exp_reflections) # report rejected experiments, reflections experiments_rejected_by_postrefinement = len(experiments) - len( new_experiments) reflections_rejected_by_postrefinement = reflections.size( ) - new_reflections.size() self.logger.log("Experiments rejected by post-refinement: %d" % experiments_rejected_by_postrefinement) self.logger.log("Reflections rejected by post-refinement: %d" % reflections_rejected_by_postrefinement) all_reasons = [] for reason, count in six.iteritems(experiments_rejected_by_reason): self.logger.log("Experiments rejected due to %s: %d" % (reason, count)) all_reasons.append(reason) comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI # Collect all rejection reasons from all ranks. Use allreduce to let each rank have all reasons. all_reasons = comm.allreduce(all_reasons, MPI.SUM) all_reasons = set(all_reasons) # Now that each rank has all reasons from all ranks, we can treat the reasons in a uniform way. total_experiments_rejected_by_reason = {} for reason in all_reasons: rejected_experiment_count = 0 if reason in experiments_rejected_by_reason: rejected_experiment_count = experiments_rejected_by_reason[ reason] total_experiments_rejected_by_reason[reason] = comm.reduce( rejected_experiment_count, MPI.SUM, 0) total_accepted_experiment_count = comm.reduce(len(new_experiments), MPI.SUM, 0) # how many reflections have we rejected due to post-refinement? rejected_reflections = len(reflections) - len(new_reflections) total_rejected_reflections = self.mpi_helper.sum(rejected_reflections) if self.mpi_helper.rank == 0: for reason, count in six.iteritems( total_experiments_rejected_by_reason): self.logger.main_log( "Total experiments rejected due to %s: %d" % (reason, count)) self.logger.main_log("Total experiments accepted: %d" % total_accepted_experiment_count) self.logger.main_log( "Total reflections rejected due to post-refinement: %d" % total_rejected_reflections) self.logger.log_step_time("POSTREFINEMENT", True) # Do we have any data left? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(new_experiments, new_reflections) return new_experiments, new_reflections
def run(self, input_experiments, input_reflections): from collections import OrderedDict if self.mpi_helper.rank == 0: print("Starting cosym worker") #Overall = Profiler("Cosym total time") # Evenly distribute all experiments from mpi_helper ranks reports = self.mpi_helper.comm.gather( (len(input_experiments)), root=0) # report from all ranks on experiment count if self.mpi_helper.rank == 0: from xfel.merging.application.modify.token_passing_left_right import construct_src_to_dst_plan plan = construct_src_to_dst_plan( flex.int(reports), self.params.modify.cosym.tranch_size, self.mpi_helper.comm) else: plan = 0 plan = self.mpi_helper.comm.bcast(plan, root=0) dst_offset = 1 if self.mpi_helper.size > 1 else 0 # decision whether to reserve rank 0 for parallel anchor determination # FIXME XXX probably need to look at plan size to decide dst_offset or not from xfel.merging.application.modify.token_passing_left_right import apply_all_to_all tokens = apply_all_to_all(plan=plan, dst_offset=dst_offset, value=(input_experiments, input_reflections), comm=self.mpi_helper.comm) if self.params.modify.cosym.anchor: if self.mpi_helper.rank == 0: MIN_ANCHOR = 20 from xfel.merging.application.modify.token_passing_left_right import construct_anchor_src_to_dst_plan anchor_plan = construct_anchor_src_to_dst_plan( MIN_ANCHOR, flex.int(reports), self.params.modify.cosym.tranch_size, self.mpi_helper.comm) else: anchor_plan = 0 anchor_plan = self.mpi_helper.comm.bcast(anchor_plan, root=0) self.logger.log_step_time("COSYM") if self.params.modify.cosym.plot.interactive: self.params.modify.cosym.plot.filename = None has_tokens = len(tokens) > 0 all_has_tokens = self.mpi_helper.comm.allgather(has_tokens) ranks_with_tokens = [ i for (i, val) in enumerate(all_has_tokens) if val ] ranks_to_plot = ranks_with_tokens[:self.params.modify.cosym.plot.n_max] do_plot = (self.params.modify.cosym.plot.do_plot and self.mpi_helper.rank in ranks_to_plot) if len( tokens ) > 0: # Only select ranks that have been assigned tranch data, for mutual coset determination # because cosym has a problem with hashed identifiers, use simple experiment identifiers sampling_experiments_for_cosym = ExperimentList() sampling_reflections_for_cosym = [ ] # is a list of flex.reflection_table COSYM = self.task_c(self.params, self.mpi_helper, self.logger, tokens, sampling_experiments_for_cosym, sampling_reflections_for_cosym, communicator_size=self.mpi_helper.size, do_plot=do_plot) self.uuid_cache = COSYM.uuid_cache # reformed uuid list after n_refls filter rank_N_refl = flex.double([r.size() for r in COSYM.reflections]) message = """Task 1. Prepare the data for cosym change_of_basis_ops_to_minimum_cell eliminate_sys_absent transform models into Miller arrays, putting data in primitive triclinic reduced cell There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % ( len(COSYM.experiments), flex.sum(rank_N_refl), flex.mean(rank_N_refl)) self.logger.log(message) if self.mpi_helper.rank == 1: print(message) #; P = Timer("COSYM.run") COSYM.run() #if self.mpi_helper.rank == 1: del P keyval = [("experiment", []), ("reindex_op", []), ("coset", [])] raw = OrderedDict(keyval) if self.mpi_helper.rank == 0: print("Rank", self.mpi_helper.rank, "experiments:", len(sampling_experiments_for_cosym)) for sidx in range(len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx try: minimum_to_input = COSYM.cb_op_to_minimum[ sidx_plus].inverse() except Exception as e: print("raising", e, sidx_plus, len(COSYM.cb_op_to_minimum)) raise e reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \ COSYM.cb_op_to_minimum[sidx_plus] # Keep this block even though not currently used; need for future assertions: LG = COSYM.cosym_analysis.target._lattice_group LGINP = LG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) SG = COSYM.cosym_analysis.input_space_group SGINP = SG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) CO = sgtbx.cosets.left_decomposition(LGINP, SGINP) partitions = CO.partitions this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) raw["reindex_op"].append(this_reindex_op) keys = list(raw.keys()) from pandas import DataFrame as df data = df(raw) # major assumption is that all the coset decompositions "CO" are the same. NOT sure if a test is needed. reports = self.mpi_helper.comm.gather((data, CO), root=0) else: reports = self.mpi_helper.comm.gather(None, root=0) if self.mpi_helper.rank == 0: # report back to rank==0 and reconcile all coset assignments while None in reports: reports.pop(reports.index(None)) # global CO global_coset_decomposition = reports[0][ 1] # again, assuming here they are all the same XXX else: global_coset_decomposition = 0 global_coset_decomposition = self.mpi_helper.comm.bcast( global_coset_decomposition, root=0) partitions = global_coset_decomposition.partitions self.mpi_helper.comm.barrier() # end of distributed embedding if self.params.modify.cosym.anchor: anchor_tokens = apply_all_to_all(plan=anchor_plan, dst_offset=0, value=(input_experiments, input_reflections), comm=self.mpi_helper.comm) if self.mpi_helper.rank == 0: from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports REC = reconcile_cosym_reports(reports) results = REC.composite_tranch_merge(voting_method="consensus") # at this point we have the opportunity to reconcile the results with an anchor # recycle the data structures for anchor determination if self.params.modify.cosym.anchor: sampling_experiments_for_cosym, sampling_reflections_for_cosym = self.task_a( self.params) ANCHOR = self.task_c( self.params, self.mpi_helper, self.logger, anchor_tokens, sampling_experiments_for_cosym, sampling_reflections_for_cosym, uuid_starting=["anchor structure"], communicator_size=1) # only run on the rank==0 tranch. self.uuid_cache = ANCHOR.uuid_cache # reformed uuid list after n_refls filter #P = Timer("ANCHOR.run") ANCHOR.run( ) # Future redesign XXX FIXME do this in rank 0 in parallel with distributed composite tranches #del P keyval = [("experiment", []), ("coset", [])] raw = OrderedDict(keyval) print("Anchor", "experiments:", len(sampling_experiments_for_cosym)) anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \ ANCHOR.cb_op_to_minimum[0] anchor_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if anchor_op.as_hkl() in partition_ops: anchor_coset = p_no break assert anchor_coset is not None print("The consensus for the anchor is", anchor_op.as_hkl(), " anchor coset", anchor_coset) raw["experiment"].append("anchor structure") raw["coset"].append(anchor_coset) for sidx in range(1, len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx minimum_to_input = ANCHOR.cb_op_to_minimum[ sidx_plus].inverse() reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \ ANCHOR.cb_op_to_minimum[sidx_plus] this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) from pandas import DataFrame as df anchor_data = df(raw) REC.reconcile_with_anchor(results, anchor_data, anchor_op) # no need for return value; results dataframe is modified in place if self.params.modify.cosym.dataframe: import os results.to_pickle( path=os.path.join(self.params.output.output_dir, self.params.modify.cosym.dataframe)) transmitted = results else: transmitted = 0 self.mpi_helper.comm.barrier() transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) # "transmitted" holds the global coset assignments #subselect expt and refl on the successful coset assignments # output: experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place) result_experiments_for_cosym = ExperimentList() good_refls = flex.bool(len(input_reflections), False) good_expt_id = list(transmitted["experiment"]) good_coset = list( transmitted["coset"] ) # would like to understand how to use pandas rather than Python list for iexpt in range(len(input_experiments)): iexpt_id = input_experiments[iexpt].identifier keepit = iexpt_id in good_expt_id if keepit: this_coset = good_coset[good_expt_id.index(iexpt_id)] this_cb_op = change_of_basis_op( global_coset_decomposition.partitions[this_coset][0]) accepted_expt = input_experiments[iexpt] if this_coset > 0: accepted_expt.crystal = MosaicCrystalSauter2014( accepted_expt.crystal.change_basis(this_cb_op)) # need to use wrapper because of cctbx/dxtbx#5 result_experiments_for_cosym.append(accepted_expt) good_refls |= input_reflections["exp_id"] == iexpt_id selected_reflections = input_reflections.select( good_refls) # XXX is this in place (double check) self.mpi_helper.comm.barrier() # still have to reindex the reflection table, but try to do it efficiently from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset if (len(result_experiments_for_cosym) > 0): reindex_refl_by_coset( refl=selected_reflections, data=transmitted, symms=[ E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym ], uuids=[E.identifier for E in result_experiments_for_cosym], co=global_coset_decomposition, anomalous_flag=self.params.merging.merge_anomalous == False, verbose=False) # this should have re-indexed the refls in place, no need for return value self.mpi_helper.comm.barrier() # Note: this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group) # in this use case we assume all inputs and outputs are in P63. # more complex use cases would have to reset the space group in the crystal, and recalculate # the ASU "miller_indicies" in the reflections table. self.logger.log_step_time("COSYM", True) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(result_experiments_for_cosym, selected_reflections) return result_experiments_for_cosym, selected_reflections
def run(self, experiments, reflections): if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported return experiments, reflections self.logger.log_step_time("FILTER_EXPERIMENTS") # If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets. if self.params.filter.unit_cell.value.target_unit_cell == Auto: self.params.filter.unit_cell.value.target_unit_cell = self.params.scaling.unit_cell if self.params.filter.unit_cell.value.target_space_group == Auto: self.params.filter.unit_cell.value.target_space_group = self.params.scaling.space_group self.logger.log( "Using filter target unit cell: %s" % str(self.params.filter.unit_cell.value.target_unit_cell)) self.logger.log( "Using filter target space group: %s" % str(self.params.filter.unit_cell.value.target_space_group)) experiment_ids_to_remove = [] removed_for_unit_cell = 0 removed_for_space_group = 0 for experiment in experiments: if not self.check_space_group(experiment): experiment_ids_to_remove.append(experiment.identifier) removed_for_space_group += 1 elif not self.check_unit_cell(experiment): experiment_ids_to_remove.append(experiment.identifier) removed_for_unit_cell += 1 new_experiments, new_reflections = experiment_filter.remove_experiments( experiments, reflections, experiment_ids_to_remove) removed_reflections = len(reflections) - len(new_reflections) assert removed_for_space_group + removed_for_unit_cell == len( experiments) - len(new_experiments) self.logger.log( "Experiments rejected because of unit cell dimensions: %d" % removed_for_unit_cell) self.logger.log("Experiments rejected because of space group %d" % removed_for_space_group) self.logger.log( "Reflections rejected because of rejected experiments: %d" % removed_reflections) # MPI-reduce total counts comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0) total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0) total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0) # rank 0: log total counts if self.mpi_helper.rank == 0: self.logger.main_log( "Total experiments rejected because of unit cell dimensions: %d" % total_removed_for_unit_cell) self.logger.main_log( "Total experiments rejected because of space group %d" % total_removed_for_space_group) self.logger.main_log( "Total reflections rejected because of rejected experiments %d" % total_reflections_removed) self.logger.log_step_time("FILTER_EXPERIMENTS", True) # Do we have any data left? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(new_experiments, new_reflections) return new_experiments, new_reflections
def run(self, experiments, reflections): if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported return experiments, reflections self.logger.log_step_time("FILTER_EXPERIMENTS") experiment_ids_to_remove = [] removed_for_unit_cell = 0 removed_for_space_group = 0 # BEGIN BY-VALUE FILTER if self.params.filter.unit_cell.algorithm == "value": # If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets. if self.params.filter.unit_cell.value.target_unit_cell == Auto: if self.params.scaling.unit_cell is None: try: self.params.filter.unit_cell.value.target_unit_cell = self.params.statistics.average_unit_cell except AttributeError: pass else: self.params.filter.unit_cell.value.target_unit_cell = self.params.scaling.unit_cell if self.params.filter.unit_cell.value.target_space_group == Auto: self.params.filter.unit_cell.value.target_space_group = self.params.scaling.space_group self.logger.log("Using filter target unit cell: %s"%str(self.params.filter.unit_cell.value.target_unit_cell)) self.logger.log("Using filter target space group: %s"%str(self.params.filter.unit_cell.value.target_space_group)) for experiment in experiments: if not self.check_space_group(experiment): experiment_ids_to_remove.append(experiment.identifier) removed_for_space_group += 1 elif not self.check_unit_cell(experiment): experiment_ids_to_remove.append(experiment.identifier) removed_for_unit_cell += 1 # END BY-VALUE FILTER elif self.params.filter.unit_cell.algorithm == "cluster": from uc_metrics.clustering.util import get_population_permutation # implicit import import pickle class Empty: pass if self.mpi_helper.rank == 0: with open(self.params.filter.unit_cell.cluster.covariance.file,'rb') as F: data = pickle.load(F) E=Empty() E.features_ = data["features"] E.sample_name = data["sample"] E.output_info = data["info"] pop=data["populations"] self.logger.main_log("Focusing on cluster component %d from previous analysis of %d cells"%( self.params.filter.unit_cell.cluster.covariance.component, len(pop.labels))) self.logger.main_log("%s noise %d order %s"%(pop.populations, pop.n_noise_, pop.main_components)) legend = pop.basic_covariance_compact_report(feature_vectors=E).getvalue() self.logger.main_log(legend) self.logger.main_log("Applying Mahalanobis cutoff of %.3f"%(self.params.filter.unit_cell.cluster.covariance.mahalanobis)) transmitted = data else: transmitted = None # distribute cluster information to all ranks self.cluster_data = self.mpi_helper.comm.bcast(transmitted, root=0) # pull out the index numbers of the unit cell parameters to be used for covariance matrix self.cluster_data["idxs"]=[["a","b","c","alpha","beta","gamma"].index(F) for F in self.cluster_data["features"]] for experiment in experiments: if not self.check_cluster(experiment): experiment_ids_to_remove.append(experiment.identifier) removed_for_unit_cell += 1 # END OF COVARIANCE FILTER new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove) removed_reflections = len(reflections) - len(new_reflections) assert removed_for_space_group + removed_for_unit_cell == len(experiments) - len(new_experiments) self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell) self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group) self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections) # MPI-reduce total counts comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0) total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0) total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0) # rank 0: log total counts if self.mpi_helper.rank == 0: self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell) self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group) self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed) self.logger.log_step_time("FILTER_EXPERIMENTS", True) # Do we have any data left? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(new_experiments, new_reflections) return new_experiments, new_reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters # optionally write a file list mapping to disk, useful in post processing if save_experiments_and_reflections=True file_id_from_names = None if self.params.output.expanded_bookkeeping: apath = lambda x: os.path.abspath(x) file_names_from_id = { i_f: tuple(map(apath, exp_ref_pair)) for i_f, exp_ref_pair in enumerate(file_list) } with open( os.path.join(self.params.output.output_dir, "file_list_map.json"), "w") as o: json.dump(file_names_from_id, o) file_id_from_names = { tuple(map(apath, exp_ref_pair)): i_f for i_f, exp_ref_pair in enumerate(file_list) } per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list, file_id_from_names else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") new_file_list, file_names_mapping = self.mpi_helper.comm.bcast( transmitted, root=0) new_file_list = new_file_list[ self.mpi_helper. rank] if self.mpi_helper.rank < len(new_file_list) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: self.logger.log("Reading %s %s" % (experiments_filename, reflections_filename)) experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=self.params.input.read_image_headers) reflections = flex.reflection_table.from_file( reflections_filename) if self.params.output.expanded_bookkeeping: # NOTE: these are un-prunable reflections["input_refl_index"] = flex.int( list(range(len(reflections)))) reflections["orig_exp_id"] = reflections['id'] assert file_names_mapping is not None exp_ref_pair = os.path.abspath( experiments_filename), os.path.abspath( reflections_filename) this_refl_fileMappings = [ file_names_mapping[exp_ref_pair] ] * len(reflections) reflections["file_list_mapping"] = flex.int( this_refl_fileMappings) self.logger.log("Data read, prepping") if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 new_ids = flex.int(len(reflections), -1) new_identifiers = flex.std_string(len(reflections)) eid = reflections.experiment_identifiers() for k in eid.keys(): del eid[k] if self.params.output.expanded_bookkeeping: preGen_experiment_identifiers(experiments, experiments_filename) for experiment_id, experiment in enumerate(experiments): # select reflections of the current experiment refls_sel = reflections['id'] == experiment_id if refls_sel.count(True) == 0: continue if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) if not self.params.input.keep_imagesets: experiment.imageset = None all_experiments.append(experiment) # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally new_identifiers.set_selected(refls_sel, experiment.identifier) new_id = len(all_experiments) - 1 eid[new_id] = experiment.identifier new_ids.set_selected(refls_sel, new_id) assert (new_ids < 0 ).count(True) == 0, "Not all reflections accounted for" reflections['id'] = new_ids reflections['exp_id'] = new_identifiers all_reflections.extend(reflections) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) all_reflections = self.prune_reflection_table_keys(all_reflections) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) for experiment_id, experiment in enumerate(experiments): if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) all_experiments.append(experiment) #experiment.identifier = "%d"%(len(all_experiments) - 1) # select reflections of the current experiment refls = reflections.select( reflections['id'] == experiment_id) # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally #refls['id'] = flex.size_t(len(refls), len(all_experiments)-1) refls['exp_id'] = flex.std_string(len(refls), experiment.identifier) all_reflections.extend(refls) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.reflection_table_utils import reflection_table_utils all_reflections = reflection_table_utils.prune_reflection_table_keys( reflections=all_reflections, keys_to_keep=[ 'intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', 'exp_id', 's1' ]) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(self, experiments, reflections): assert self.mpi_helper.size not in [2,3,4], "Please run modify_cosym on " \ "1 or >= 5 MPI ranks." self.logger.log_step_time("COSYM") all_sampling_experiments = experiments all_sampling_reflections = reflections # because cosym has a problem with hashed identifiers, use simple experiment identifiers from dxtbx.model.experiment_list import ExperimentList sampling_experiments_for_cosym = ExperimentList() sampling_reflections_for_cosym = [ ] # is a list of flex.reflection_table def task_a(): # add an anchor if self.params.modify.cosym.anchor: from xfel.merging.application.model.crystal_model import crystal_model XM = crystal_model(params=self.params, purpose="cosym") model_intensities = XM.run([], []) from dxtbx.model import Experiment, Crystal from scitbx.matrix import sqr O = sqr(model_intensities.unit_cell().orthogonalization_matrix( )).transpose().elems real_a = (O[0], O[1], O[2]) real_b = (O[3], O[4], O[5]) real_c = (O[6], O[7], O[8]) nc = Crystal(real_a, real_b, real_c, model_intensities.space_group()) sampling_experiments_for_cosym.append( Experiment(crystal=nc) ) # prepends the reference model to the cosym E-list from dials.array_family import flex exp_reflections = flex.reflection_table() exp_reflections[ 'intensity.sum.value'] = model_intensities.data() exp_reflections['intensity.sum.variance'] = flex.pow( model_intensities.sigmas(), 2) exp_reflections['miller_index'] = model_intensities.indices() exp_reflections[ 'miller_index_asymmetric'] = model_intensities.indices() exp_reflections['flags'] = flex.size_t( model_intensities.size(), flex.reflection_table.flags.integrated_sum) # prepare individual reflection tables for each experiment simple_experiment_id = len(sampling_experiments_for_cosym) - 1 #experiment.identifier = "%d"%simple_experiment_id sampling_experiments_for_cosym[ -1].identifier = "%d" % simple_experiment_id # experiment identifier must be a string according to *.h file # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis exp_reflections['id'] = flex.int(len(exp_reflections), simple_experiment_id) # register the integer id as a new column in the per-experiment reflection table exp_reflections.experiment_identifiers( )[simple_experiment_id] = sampling_experiments_for_cosym[ -1].identifier #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment) sampling_reflections_for_cosym.append(exp_reflections) #if self.mpi_helper.rank == 0: # task_a() # no anchor for initial pass def task_1(uuid_starting=[], mpi_helper_size=1, do_plot=False): self.uuid_cache = uuid_starting if mpi_helper_size == 1: # simple case, one rank for experiment in all_sampling_experiments: sampling_experiments_for_cosym.append(experiment) self.uuid_cache.append(experiment.identifier) exp_reflections = all_sampling_reflections.select( all_sampling_reflections['exp_id'] == experiment.identifier) # prepare individual reflection tables for each experiment simple_experiment_id = len( sampling_experiments_for_cosym) - 1 #experiment.identifier = "%d"%simple_experiment_id sampling_experiments_for_cosym[ -1].identifier = "%d" % simple_experiment_id # experiment identifier must be a string according to *.h file # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis exp_reflections['id'] = flex.int(len(exp_reflections), simple_experiment_id) # register the integer id as a new column in the per-experiment reflection table exp_reflections.experiment_identifiers( )[simple_experiment_id] = sampling_experiments_for_cosym[ -1].identifier #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment) sampling_reflections_for_cosym.append(exp_reflections) else: # complex case, overlap tranches for mutual coset determination self.mpi_helper.MPI.COMM_WORLD.barrier() from xfel.merging.application.modify.token_passing_left_right import token_passing_left_right values = token_passing_left_right((experiments, reflections)) for tranch_experiments, tranch_reflections in values: for experiment in tranch_experiments: sampling_experiments_for_cosym.append(experiment) self.uuid_cache.append(experiment.identifier) exp_reflections = tranch_reflections.select( tranch_reflections['exp_id'] == experiment.identifier) # prepare individual reflection tables for each experiment simple_experiment_id = len( sampling_experiments_for_cosym) - 1 #experiment.identifier = "%d"%simple_experiment_id sampling_experiments_for_cosym[ -1].identifier = "%d" % simple_experiment_id # experiment identifier must be a string according to *.h file # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis exp_reflections['id'] = flex.int( len(exp_reflections), simple_experiment_id) # register the integer id as a new column in the per-experiment reflection table exp_reflections.experiment_identifiers( )[simple_experiment_id] = sampling_experiments_for_cosym[ -1].identifier #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment) sampling_reflections_for_cosym.append(exp_reflections) from dials.command_line import cosym as cosym_module cosym_module.logger = self.logger i_plot = self.mpi_helper.rank from xfel.merging.application.modify.aux_cosym import dials_cl_cosym_subclass as dials_cl_cosym_wrapper COSYM = dials_cl_cosym_wrapper( sampling_experiments_for_cosym, sampling_reflections_for_cosym, self.uuid_cache, params=self.params.modify.cosym, output_dir=self.params.output.output_dir, do_plot=do_plot, i_plot=i_plot) return COSYM if self.params.modify.cosym.plot.interactive: self.params.modify.cosym.plot.filename = None do_plot = (self.params.modify.cosym.plot.do_plot and self.mpi_helper.rank < self.params.modify.cosym.plot.n_max) COSYM = task_1(mpi_helper_size=self.mpi_helper.size, do_plot=do_plot) self.uuid_cache = COSYM.uuid_cache # reformed uuid list after n_refls filter import dials.algorithms.symmetry.cosym.target from xfel.merging.application.modify.aux_cosym import TargetWithFastRij dials.algorithms.symmetry.cosym.target.Target = TargetWithFastRij rank_N_refl = flex.double([r.size() for r in COSYM.reflections]) message = """Task 1. Prepare the data for cosym change_of_basis_ops_to_minimum_cell eliminate_sys_absent transform models into Miller arrays, putting data in primitive triclinic reduced cell There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % ( len(COSYM.experiments), flex.sum(rank_N_refl), flex.mean(rank_N_refl)) self.logger.log(message) COSYM.run() from collections import OrderedDict #assert len(sampling_experiments_for_cosym) + 1 anchor if present == len(COSYM._experiments) keyval = [("experiment", []), ("reindex_op", []), ("coset", [])] raw = OrderedDict(keyval) print("Rank", self.mpi_helper.rank, "experiments:", len(sampling_experiments_for_cosym)) for sidx in range(len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx minimum_to_input = COSYM.cb_op_to_minimum[sidx_plus].inverse() reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \ COSYM.cb_op_to_minimum[sidx_plus] # Keep this block even though not currently used; need for future assertions: LG = COSYM.cosym_analysis.target._lattice_group LGINP = LG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) SG = COSYM.cosym_analysis.input_space_group SGINP = SG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) CO = sgtbx.cosets.left_decomposition(LGINP, SGINP) partitions = CO.partitions this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) raw["reindex_op"].append(this_reindex_op) keys = list(raw.keys()) from pandas import DataFrame as df data = df(raw) # major assumption is that all the coset decompositions "CO" are the same. NOT sure if a test is needed. # report back to rank==0 and reconcile all coset assignments reports = self.mpi_helper.comm.gather((data, CO), root=0) if self.mpi_helper.rank == 0: from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports REC = reconcile_cosym_reports(reports) results = REC.simple_merge(voting_method="consensus") # at this point we have the opportunity to reconcile the results with an anchor # recycle the data structures for anchor determination if self.params.modify.cosym.anchor: sampling_experiments_for_cosym = ExperimentList() sampling_reflections_for_cosym = [] print("ANCHOR determination") task_a() ANCHOR = task_1( uuid_starting=["anchor structure"], mpi_helper_size=1) # only run on the rank==0 tranch. self.uuid_cache = ANCHOR.uuid_cache # reformed uuid list after n_refls filter ANCHOR.run() keyval = [("experiment", []), ("coset", [])] raw = OrderedDict(keyval) print("Anchor", "experiments:", len(sampling_experiments_for_cosym)) anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \ ANCHOR.cb_op_to_minimum[0] anchor_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if anchor_op.as_hkl() in partition_ops: anchor_coset = p_no break assert anchor_coset is not None print("The consensus for the anchor is", anchor_op.as_hkl(), " anchor coset", anchor_coset) raw["experiment"].append("anchor structure") raw["coset"].append(anchor_coset) for sidx in range(1, len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx minimum_to_input = ANCHOR.cb_op_to_minimum[ sidx_plus].inverse() reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \ ANCHOR.cb_op_to_minimum[sidx_plus] this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) from pandas import DataFrame as df anchor_data = df(raw) REC.reconcile_with_anchor(results, anchor_data, anchor_op) # no need for return value; results dataframe is modified in place if self.params.modify.cosym.dataframe: import os results.to_pickle( path=os.path.join(self.params.output.output_dir, self.params.modify.cosym.dataframe)) transmitted = results else: transmitted = None self.mpi_helper.comm.barrier() transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) # "transmitted" holds the global coset assignments # subselect expt and refl on the successful coset assignments # output: experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place) result_experiments_for_cosym = ExperimentList() good_refls = flex.bool(len(reflections), False) good_expt_id = list(transmitted["experiment"]) good_coset = list( transmitted["coset"] ) # would like to understand how to use pandas rather than Python list for iexpt in range(len(experiments)): iexpt_id = experiments[iexpt].identifier keepit = iexpt_id in good_expt_id if keepit: this_coset = good_coset[good_expt_id.index(iexpt_id)] this_cb_op = change_of_basis_op(CO.partitions[this_coset][0]) accepted_expt = experiments[iexpt] if this_coset > 0: accepted_expt.crystal = MosaicCrystalSauter2014( accepted_expt.crystal.change_basis(this_cb_op)) # need to use wrapper because of cctbx/dxtbx#5 result_experiments_for_cosym.append(accepted_expt) good_refls |= reflections["exp_id"] == iexpt_id reflections = reflections.select(good_refls) self.mpi_helper.comm.barrier() #if self.mpi_helper.rank == 0: # import pickle # with open("refl.pickle","wb") as F: # pickle.dump(reflections, F) # pickle.dump(transmitted, F) # pickle.dump([E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym],F) # pickle.dump([E.identifier for E in result_experiments_for_cosym],F) # pickle.dump(CO, F) # still have to reindex the reflection table, but try to do it efficiently from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset reindex_refl_by_coset( refl=reflections, data=transmitted, symms=[ E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym ], uuids=[E.identifier for E in result_experiments_for_cosym], co=CO, anomalous_flag=self.params.merging.merge_anomalous == False, verbose=False) # this should have re-indexed the refls in place, no need for return value self.mpi_helper.comm.barrier() # Note: this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group) # in this use case we assume all inputs and outputs are in P63. # more complex use cases would have to reset the space group in the crystal, and recalculate # the ASU "miller_indicies" in the reflections table. self.logger.log_step_time("COSYM", True) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(result_experiments_for_cosym, reflections) return result_experiments_for_cosym, reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) # NOTE: had to use slicing below because it selection no longer works... reflections.sort("id") unique_refl_ids = set(reflections['id']) assert len(unique_refl_ids) == len( experiments ), "refl table and experiment list should contain data on same experiment " # TODO: decide if this is true assert min( reflections["id"] ) >= 0, "No more -1 in the id column, ideally it should be the numerical index of experiment, but beware that this is not enforced anywhere in the upstream code base" if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 for experiment_id, experiment in enumerate(experiments): if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) all_experiments.append(experiment) # select reflections of the current experiment # FIXME the selection was broke for me, it raised # RuntimeError: boost::bad_get: failed value get using boost::get #refls = reflections.select(reflections['id'] == experiment_id) # NOTE: this is a hack due to the broken expereimnt_id selection above exp_id_pos = np.where( reflections['id'] == experiment_id)[0] assert exp_id_pos.size, "no refls in this experiment" # NOTE: maybe we can relax this assertion ? refls = reflections[exp_id_pos[0]:exp_id_pos[-1] + 1] #FIXME: how will this work if reading in multiple composite mode experiment jsons? # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally refls['exp_id'] = flex.std_string(len(refls), experiment.identifier) new_id = 0 if len(all_reflections) > 0: new_id = max(all_reflections['id']) + 1 # FIXME: it is hard to interperet that a function call returning a changeable property eid = refls.experiment_identifiers() for k in eid.keys(): del eid[k] eid[new_id] = experiment.identifier refls['id'] = flex.int(len(refls), new_id) all_reflections.extend(refls) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.reflection_table_utils import reflection_table_utils all_reflections = reflection_table_utils.prune_reflection_table_keys( reflections=all_reflections, keys_to_keep=[ 'intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', 'exp_id', 's1', 'intensity.sum.value.unmodified', 'intensity.sum.variance.unmodified' ]) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections