def chosen_weights(observation_set, params): data = observation_set.data() sigmas = observation_set.sigmas() return { "unit": flex.double(len(data), 1.), "variance": 1. / (sigmas * sigmas), "gentle": flex.pow(flex.sqrt(flex.abs(data)) / sigmas, 2), "extreme": flex.pow(data / sigmas, 2) }[params.postrefinement.target_weighting]
def __init__(self, d_spacings, target_n_per_bin=25, max_slots=40, min_slots=20): d_spacings = flex.double(list(set(d_spacings))) d_spacings_sorted = flex.sorted(d_spacings, reverse=True) d_star_cubed_sorted = flex.pow(1 / d_spacings_sorted, 3) # choose bin volume such that lowest resolution shell contains 5% of the # spots, or 25, whichever is greater low_res_count = int( math.ceil( min( max(target_n_per_bin, 0.05 * len(d_spacings)), 0.25 * len(d_spacings), ))) bin_step = d_star_cubed_sorted[low_res_count] - d_star_cubed_sorted[0] assert bin_step > 0 n_slots = int( math.ceil( (d_star_cubed_sorted[-1] - d_star_cubed_sorted[0]) / bin_step)) if max_slots is not None: n_slots = min(n_slots, max_slots) if min_slots is not None: n_slots = max(n_slots, min_slots) bin_step = (d_star_cubed_sorted[-1] - d_star_cubed_sorted[0]) / n_slots self.bins = [] ds3_max = d_star_cubed_sorted[0] for i in range(n_slots): ds3_min = d_star_cubed_sorted[0] + (i + 1) * bin_step self.bins.append(Slot(1 / ds3_min**(1 / 3), 1 / ds3_max**(1 / 3))) ds3_max = ds3_min
def __init__(self, d_spacings, target_n_per_bin=25, max_slots=40, min_slots=20): d_spacings_sorted = flex.sorted(d_spacings, reverse=True) d_star_cubed_sorted = flex.pow(1/d_spacings_sorted, 3) # choose bin volume such that lowest resolution shell contains 5% of the # spots, or 25, whichever is greater low_res_count = int( math.ceil(min(max(target_n_per_bin, 0.05*len(d_spacings)), 0.25*len(d_spacings)))) bin_step = d_star_cubed_sorted[low_res_count] - d_star_cubed_sorted[0] n_slots = int( math.ceil((d_star_cubed_sorted[-1] - d_star_cubed_sorted[0])/bin_step)) #n_slots = len(d_spacings_sorted)//target_n_per_bin if max_slots is not None: n_slots = min(n_slots, max_slots) if min_slots is not None: n_slots = max(n_slots, min_slots) bin_step = (d_star_cubed_sorted[-1] - d_star_cubed_sorted[0])/n_slots self.bins = [] ds3_max = d_star_cubed_sorted[0] for i in range(n_slots): ds3_min = d_star_cubed_sorted[0] + (i+1) * bin_step self.bins.append(slot(1/ds3_min**(1/3), 1/ds3_max**(1/3))) ds3_max = ds3_min
def result_for_cxi_merge(self, file_name): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.nave1_refinery.scaler_callable( self.get_parameter_values()) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow( partiality_array, 0.5 * self.params.postrefinement.merge_partiality_exponent) fat_selection = ( self.nave1_refinery.lorentz_callable(self.get_parameter_values()) > self.params.postrefinement.rs_hybrid.partiality_threshold ) # was 0.2 for rs2 fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError("< 3 near-fulls after refinement") print >> self.out, "On total %5d the fat selection is %5d" % (len( self.observations_pair1_selected.indices()), fat_count) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices=self.observations_pair1_selected.indices().select( fat_selection), data=( self.observations_pair1_selected.data().select(fat_selection) / scaler_s), sigmas=( self.observations_pair1_selected.sigmas().select(fat_selection) / (scaler_s * p_scaler_s))) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double( [self.i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [self.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) I_weight.set_selected(I_invalid, 0.) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) print >> self.out, "CORR: NEW correlation is", SWC.corr print >> self.out, "ASTAR_FILE", file_name, tuple( self.nave1_refinery.get_eff_Astar(values)) self.final_corr = SWC.corr #another range assertion assert self.final_corr > 0.1, "correlation coefficient out of range (<= 0.1) after LevMar refinement" # XXX Specific to the hybrid_rs method, and likely these limits are problem-specific (especially G-max) so look for another approach # or expose the limits as phil parameters. assert values.G < 0.5, "G-scale value out of range ( > 0.5 XXX may be too strict ) after LevMar refinement" return observations_original_index, observations, matches
def result_for_cxi_merge(self, file_name): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.refinery.scaler_callable( self.parameterization_class(self.MINI.x)) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow( partiality_array, 0.5 * self.params.postrefinement.merge_partiality_exponent) fat_selection = (partiality_array > 0.2) fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError("< 3 near-fulls after refinement") print("On total %5d the fat selection is %5d" % (len(self.observations_pair1_selected.indices()), fat_count), file=self.out) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices=self.observations_pair1_selected.indices().select( fat_selection), data=( self.observations_pair1_selected.data().select(fat_selection) / scaler_s), sigmas=( self.observations_pair1_selected.sigmas().select(fat_selection) / (scaler_s * p_scaler_s))) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double( [self.i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [self.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) I_weight.set_selected(I_invalid, 0.) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) print("CORR: NEW correlation is", SWC.corr, file=self.out) print("ASTAR_FILE", file_name, tuple(self.refinery.get_eff_Astar(values)), file=self.out) self.final_corr = SWC.corr self.refined_mini = self.MINI #another range assertion assert self.final_corr > 0.1, "correlation coefficient out of range (<= 0.1) after rs2 refinement" return observations_original_index, observations, matches
def correct(refl_sele, smart_sigmas=True): kapton_correction = image_kapton_correction( panel_size_px=panel_size_px, pixel_size_mm=pixel_size_mm, detector_dist_mm=detector_dist_mm, wavelength_ang=wavelength_ang, reflections_sele=refl_sele, params=self.params, expt=expt, refl=refl, smart_sigmas=smart_sigmas, logger=self.logger, ) k_corr, k_sigmas = kapton_correction() refl_sele["kapton_absorption_correction"] = k_corr if smart_sigmas: refl_sele["kapton_absorption_correction_sigmas"] = k_sigmas # apply corrections and propagate error # term1 = (sig(C)/C)^2 # term2 = (sig(Imeas)/Imeas)^2 # I' = C*I # sig^2(I') = (I')^2*(term1 + term2) integrated_data = refl_sele["intensity.sum.value"] integrated_variance = refl_sele["intensity.sum.variance"] integrated_sigma = flex.sqrt(integrated_variance) term1 = flex.pow(k_sigmas / k_corr, 2) term2 = flex.pow(integrated_sigma / integrated_data, 2) integrated_data *= k_corr integrated_variance = flex.pow(integrated_data, 2) * (term1 + term2) refl_sele["intensity.sum.value"] = integrated_data refl_sele["intensity.sum.variance"] = integrated_variance # order is purposeful: the two lines above require that integrated_data # has already been corrected! else: refl_sele["intensity.sum.value"] *= k_corr refl_sele["intensity.sum.variance"] *= flex.pow2(k_corr) return refl_sele
def result_for_cxi_merge(self): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.refinery.scaler_callable(self.parameterization_class(self.MINI.x)) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow(partiality_array, 0.5*self.params.postrefinement.merge_partiality_exponent) fat_selection = (partiality_array > 0.2) fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) # reject an experiment with insufficient number of near-full reflections if fat_count < 3: if self.params.output.log_level == 0: self.logger.log("Rejected experiment, because: On total %5d the fat selection is %5d"%(len(self.observations_pair1_selected.indices()), fat_count)) raise ValueError("< 3 near-fulls after refinement") if self.params.output.log_level == 0: self.logger.log("On total %5d the fat selection is %5d"%(len(self.observations_pair1_selected.indices()), fat_count)) observations_original_index = self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices = self.observations_pair1_selected.indices().select(fat_selection), data = (self.observations_pair1_selected.data().select(fat_selection)/scaler_s), sigmas = (self.observations_pair1_selected.sigmas().select(fat_selection)/(scaler_s * p_scaler_s)) ) matches = miller.match_multi_indices( miller_indices_unique=self.params.scaling.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double([self.params.scaling.i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool([self.params.scaling.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) I_weight.set_selected(I_invalid,0.) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) if self.params.output.log_level == 0: self.logger.log("CORR: NEW correlation is: %f"%SWC.corr) self.logger.log("ASTAR: ") self.logger.log(tuple(self.refinery.get_eff_Astar(values))) self.final_corr = SWC.corr self.refined_mini = self.MINI #another range assertion assert self.final_corr > 0.1,"correlation coefficient out of range (<= 0.1) after rs2 refinement" return observations_original_index, observations, matches
def result_for_cxi_merge(self, file_name): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.nave1_refinery.scaler_callable(self.get_parameter_values()) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow(partiality_array, 0.5*self.params.postrefinement.merge_partiality_exponent) fat_selection = (self.nave1_refinery.lorentz_callable(self.get_parameter_values()) > self.params.postrefinement.rs_hybrid.partiality_threshold) # was 0.2 for rs2 fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError, "< 3 near-fulls after refinement" print >> self.out, "On total %5d the fat selection is %5d"%( len(self.observations_pair1_selected.indices()), fat_count) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices = self.observations_pair1_selected.indices().select(fat_selection), data = (self.observations_pair1_selected.data().select(fat_selection)/scaler_s), sigmas = (self.observations_pair1_selected.sigmas().select(fat_selection)/(scaler_s * p_scaler_s)) ) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double([self.i_model.data()[pair[0]] for pair in matches.pairs()]) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) print >> self.out, "CORR: NEW correlation is", SWC.corr self.final_corr = SWC.corr #another range assertion assert self.final_corr > 0.1,"correlation coefficient out of range (<= 0.1) after LevMar refinement" # XXX Specific to the hybrid_rs method, and likely these limits are problem-specific (especially G-max) so look for another approach # or expose the limits as phil parameters. assert values.G < 0.5 , "G-scale value out of range ( > 0.5 XXX may be too strict ) after LevMar refinement" return observations_original_index,observations,matches
def run(self, experiments, reflections): self.logger.log_step_time("POSTREFINEMENT") if not self.params.postrefinement.enable: self.logger.log("Postrefinement was not done") if self.mpi_helper.rank == 0: self.logger.main_log("Postrefinement was not done") return experiments, reflections target_symm = symmetry( unit_cell=self.params.scaling.unit_cell, space_group_info=self.params.scaling.space_group) i_model = self.params.scaling.i_model miller_set = self.params.scaling.miller_set # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. assert len(i_model.indices()) == len(miller_set.indices()) assert (i_model.indices() == miller_set.indices()).count(False) == 0 new_experiments = ExperimentList() new_reflections = flex.reflection_table() experiments_rejected_by_reason = {} # reason:how_many_rejected for experiment in experiments: exp_reflections = reflections.select( reflections['exp_id'] == experiment.identifier) # Build a miller array for the experiment reflections with original miller indexes exp_miller_indices_original = miller.set( target_symm, exp_reflections['miller_index'], True) observations_original_index = miller.array( exp_miller_indices_original, exp_reflections['intensity.sum.value'], flex.double( flex.sqrt(exp_reflections['intensity.sum.variance']))) assert exp_reflections.size() == exp_miller_indices_original.size() assert observations_original_index.size( ) == exp_miller_indices_original.size() # Build a miller array for the experiment reflections with asu miller indexes exp_miller_indices_asu = miller.set( target_symm, exp_reflections['miller_index_asymmetric'], True) observations = miller.array( exp_miller_indices_asu, exp_reflections['intensity.sum.value'], flex.double( flex.sqrt(exp_reflections['intensity.sum.variance']))) matches = miller.match_multi_indices( miller_indices_unique=miller_set.indices(), miller_indices=observations.indices()) pair1 = flex.int([pair[1] for pair in matches.pairs() ]) # refers to the observations pair0 = flex.int([pair[0] for pair in matches.pairs() ]) # refers to the model assert exp_reflections.size() == exp_miller_indices_original.size() assert observations_original_index.size( ) == exp_miller_indices_original.size() # narrow things down to the set that matches, only observations_pair1_selected = observations.customized_copy( indices=flex.miller_index( [observations.indices()[p] for p in pair1]), data=flex.double([observations.data()[p] for p in pair1]), sigmas=flex.double([observations.sigmas()[p] for p in pair1])) observations_original_index_pair1_selected = observations_original_index.customized_copy( indices=flex.miller_index( [observations_original_index.indices()[p] for p in pair1]), data=flex.double( [observations_original_index.data()[p] for p in pair1]), sigmas=flex.double( [observations_original_index.sigmas()[p] for p in pair1])) I_observed = observations_pair1_selected.data() MILLER = observations_original_index_pair1_selected.indices() ORI = crystal_orientation(experiment.crystal.get_A(), basis_type.reciprocal) Astar = matrix.sqr(ORI.reciprocal_matrix()) Astar_from_experiment = matrix.sqr(experiment.crystal.get_A()) assert Astar == Astar_from_experiment WAVE = experiment.beam.get_wavelength() BEAM = matrix.col((0.0, 0.0, -1. / WAVE)) BFACTOR = 0. MOSAICITY_DEG = experiment.crystal.get_half_mosaicity_deg() DOMAIN_SIZE_A = experiment.crystal.get_domain_size_ang() # calculation of correlation here I_reference = flex.double( [i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) use_weights = False # New facility for getting variance-weighted correlation if use_weights: # variance weighting I_weight = flex.double([ 1. / (observations_pair1_selected.sigmas()[pair[1]])**2 for pair in matches.pairs() ]) else: I_weight = flex.double( len(observations_pair1_selected.sigmas()), 1.) I_weight.set_selected(I_invalid, 0.) """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement: include_negatives = True + and - reflections both used for Rh distribution for initial estimate of RS parameter + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) include_negatives = False + and - reflections both used for Rh distribution for initial estimate of RS parameter + reflections only used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) """ # RB: By design, for MPI-Merge "include negatives" is implicitly True SWC = simple_weighted_correlation(I_weight, I_reference, I_observed) if self.params.output.log_level == 0: self.logger.log("Old correlation is: %f" % SWC.corr) if self.params.postrefinement.algorithm == "rs": Rhall = flex.double() for mill in MILLER: H = matrix.col(mill) Xhkl = Astar * H Rh = (Xhkl + BEAM).length() - (1. / WAVE) Rhall.append(Rh) Rs = math.sqrt(flex.mean(Rhall * Rhall)) RS = 1. / 10000. # reciprocal effective domain size of 1 micron RS = Rs # try this empirically determined approximate, monochrome, a-mosaic value current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.]) parameterization_class = rs_parameterization refinery = rs_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) elif self.params.postrefinement.algorithm == "eta_deff": eta_init = 2. * MOSAICITY_DEG * math.pi / 180. D_eff_init = 2. * DOMAIN_SIZE_A current = flex.double( [SWC.slope, BFACTOR, eta_init, 0., 0., D_eff_init]) parameterization_class = eta_deff_parameterization refinery = eta_deff_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) func = refinery.fvec_callable(parameterization_class(current)) functional = flex.sum(func * func) if self.params.output.log_level == 0: self.logger.log("functional: %f" % functional) self.current = current self.parameterization_class = parameterization_class self.refinery = refinery self.observations_pair1_selected = observations_pair1_selected self.observations_original_index_pair1_selected = observations_original_index_pair1_selected error_detected = False try: self.run_plain() result_observations_original_index, result_observations, result_matches = self.result_for_cxi_merge( ) assert result_observations_original_index.size( ) == result_observations.size() assert result_matches.pairs().size( ) == result_observations_original_index.size() except (AssertionError, ValueError, RuntimeError) as e: error_detected = True reason = repr(e) if not reason: reason = "Unknown error" if not reason in experiments_rejected_by_reason: experiments_rejected_by_reason[reason] = 1 else: experiments_rejected_by_reason[reason] += 1 if not error_detected: new_experiments.append(experiment) new_exp_reflections = flex.reflection_table() new_exp_reflections[ 'miller_index_asymmetric'] = flex.miller_index( result_observations.indices()) new_exp_reflections['intensity.sum.value'] = flex.double( result_observations.data()) new_exp_reflections['intensity.sum.variance'] = flex.double( flex.pow(result_observations.sigmas(), 2)) new_exp_reflections['exp_id'] = flex.std_string( len(new_exp_reflections), experiment.identifier) new_reflections.extend(new_exp_reflections) ''' # debugging elif reason.startswith("ValueError"): self.logger.log("Rejected b/c of value error exp id: %s; unit cell: %s"%(exp_id, str(experiment.crystal.get_unit_cell())) ) ''' # report rejected experiments, reflections experiments_rejected_by_postrefinement = len(experiments) - len( new_experiments) reflections_rejected_by_postrefinement = reflections.size( ) - new_reflections.size() self.logger.log("Experiments rejected by post-refinement: %d" % experiments_rejected_by_postrefinement) self.logger.log("Reflections rejected by post-refinement: %d" % reflections_rejected_by_postrefinement) all_reasons = [] for reason, count in experiments_rejected_by_reason.iteritems(): self.logger.log("Experiments rejected due to %s: %d" % (reason, count)) all_reasons.append(reason) comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI # Collect all rejection reasons from all ranks. Use allreduce to let each rank have all reasons. all_reasons = comm.allreduce(all_reasons, MPI.SUM) all_reasons = set(all_reasons) # Now that each rank has all reasons from all ranks, we can treat the reasons in a uniform way. total_experiments_rejected_by_reason = {} for reason in all_reasons: rejected_experiment_count = 0 if reason in experiments_rejected_by_reason: rejected_experiment_count = experiments_rejected_by_reason[ reason] total_experiments_rejected_by_reason[reason] = comm.reduce( rejected_experiment_count, MPI.SUM, 0) total_accepted_experiment_count = comm.reduce(len(new_experiments), MPI.SUM, 0) # how many reflections have we rejected due to post-refinement? rejected_reflections = len(reflections) - len(new_reflections) total_rejected_reflections = self.mpi_helper.sum(rejected_reflections) if self.mpi_helper.rank == 0: for reason, count in total_experiments_rejected_by_reason.iteritems( ): self.logger.main_log( "Total experiments rejected due to %s: %d" % (reason, count)) self.logger.main_log("Total experiments accepted: %d" % total_accepted_experiment_count) self.logger.main_log( "Total reflections rejected due to post-refinement: %d" % total_rejected_reflections) self.logger.log_step_time("POSTREFINEMENT", True) return new_experiments, new_reflections
def estimate_resolution_limit_distl_method1(reflections, plot_filename=None): # Implementation of Method 1 (section 2.4.4) of: # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon # J. Appl. Cryst. (2006). 39, 112-119 # https://doi.org/10.1107/S0021889805040677 variances = reflections["intensity.sum.variance"] sel = variances > 0 reflections = reflections.select(sel) d_star_sq = flex.pow2(reflections["rlp"].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) d_star_cubed = flex.pow(reflections["rlp"].norms(), 3) step = 2 while len(reflections) / step > 40: step += 1 order = flex.sort_permutation(d_spacings, reverse=True) ds3_subset = flex.double() d_subset = flex.double() for i in range(len(reflections) // step): ds3_subset.append(d_star_cubed[order[i * step]]) d_subset.append(d_spacings[order[i * step]]) x = flex.double(range(len(ds3_subset))) # (i) # Usually, Pm is the last point, that is, m = n. But m could be smaller than # n if an unusually high number of spots are detected around a certain # intermediate resolution. In that case, our search for the image resolution # does not go outside the spot 'bump;. This is particularly useful when # ice-rings are present. slopes = (ds3_subset[1:] - ds3_subset[0]) / (x[1:] - x[0]) skip_first = 3 p_m = flex.max_index(slopes[skip_first:]) + 1 + skip_first # (ii) x1 = matrix.col((0, ds3_subset[0])) x2 = matrix.col((p_m, ds3_subset[p_m])) gaps = flex.double([0]) v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(1, p_m): x0 = matrix.col((i, ds3_subset[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) mv = flex.mean_and_variance(gaps) s = mv.unweighted_sample_standard_deviation() # (iii) p_k = flex.max_index(gaps) g_k = gaps[p_k] p_g = p_k for i in range(p_k + 1, len(gaps)): g_i = gaps[i] if g_i > (g_k - 0.5 * s): p_g = i d_g = d_subset[p_g] noisiness = 0 n = len(ds3_subset) for i in range(n - 1): for j in range(i + 1, n - 1): if slopes[i] >= slopes[j]: noisiness += 1 noisiness /= (n - 1) * (n - 2) / 2 if plot_filename is not None: from matplotlib import pyplot fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(range(len(ds3_subset)), ds3_subset) ax.set_ylabel("D^-3") xlim = pyplot.xlim() ylim = pyplot.ylim() ax.vlines(p_g, ylim[0], ylim[1], colors="red") pyplot.xlim(0, xlim[1]) pyplot.ylim(0, ylim[1]) pyplot.savefig(plot_filename) pyplot.close() return d_g, noisiness
def run_cc(self): uniform, selected_uniform, have_iso_ref = self.load_cc_data() include_negatives = True if have_iso_ref: slope, offset, corr_iso, N_iso = self.correlation( selected_uniform[1], selected_uniform[0], include_negatives) self.logger.main_log("C.C. iso is %.1f%% on %d indices" % (100 * corr_iso, N_iso)) slope, offset, corr_int, N_int = self.correlation( selected_uniform[2], selected_uniform[3], include_negatives) self.logger.main_log("C.C. int is %.1f%% on %d indices" % (100. * corr_int, N_int)) if have_iso_ref: binned_cc_ref, binned_cc_ref_N = self.binned_correlation( selected_uniform[1], selected_uniform[0], include_negatives) #binned_cc_ref.show(f=output) ref_scale = self.scale_factor(selected_uniform[1], selected_uniform[0], weights=flex.pow( selected_uniform[1].sigmas(), -2), use_binning=True) #ref_scale.show(f=output) ref_riso = self.r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale, use_binning=True) #ref_riso.show(f=output) ref_scale_all = self.scale_factor(selected_uniform[1], selected_uniform[0], weights=flex.pow( selected_uniform[1].sigmas(), -2)) ref_riso_all = self.r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale_all) binned_cc_int, binned_cc_int_N = self.binned_correlation( #selected_uniform[2], selected_uniform[3], params.include_negatives) selected_uniform[2], selected_uniform[3], True) #binned_cc_int.show(f=output) oe_scale = self.scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), use_binning=True) #oe_scale.show(f=output) oe_rint = self.r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale, use_binning=True) #oe_rint.show(f=output) oe_rsplit = self.r_split(selected_uniform[2], selected_uniform[3], use_binning=True) oe_scale_all = self.scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), ) oe_rint_all = self.r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale_all) oe_rsplit_all = self.r_split(selected_uniform[2], selected_uniform[3]) if have_iso_ref: self.logger.main_log("R factors Riso = %.1f%%, Rint = %.1f%%" % (100. * ref_riso_all, 100. * oe_rint_all)) else: self.logger.main_log("R factor Rint = %.1f%%" % (100. * oe_rint_all)) split_sigma_data = self.split_sigma_test(selected_uniform[2], selected_uniform[3], scale=oe_scale, use_binning=True, show_plot=False) split_sigma_data_all = self.split_sigma_test(selected_uniform[2], selected_uniform[3], scale=oe_scale_all, use_binning=False, show_plot=False) self.logger.main_log('') if self.params.scaling.model_reindex_op == "h,k,l": self.logger.main_log("Table of Scaling Results:") else: self.logger.main_log( "Table of Scaling Results with Model Reindexing as %s:" % reindexing_op) from libtbx import table_utils table_header = [ "", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale", "Scale", "SpSig" ] table_header2 = [ "Bin", "Resolution Range", "Completeness", "int", "int", "iso", "iso", "int", "split", "iso", "int", "iso", "Test" ] table_data = [] table_data.append(table_header) table_data.append(table_header2) items = binned_cc_int.binner.range_used() # XXX Make it clear what the completeness here actually is! cumulative_counts_given = 0 cumulative_counts_complete = 0 for bin in items: table_row = [] table_row.append("%3d" % bin) table_row.append( "%-13s" % binned_cc_int.binner.bin_legend(i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False)) table_row.append( "%13s" % binned_cc_int.binner.bin_legend(i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=False, show_counts=True)) cumulative_counts_given += binned_cc_int.binner._counts_given[bin] cumulative_counts_complete += binned_cc_int.binner._counts_complete[ bin] table_row.append("%.1f%%" % (100. * binned_cc_int.data[bin])) table_row.append("%7d" % (binned_cc_int_N.data[bin])) if have_iso_ref and binned_cc_ref.data[bin] is not None: table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin])) else: table_row.append("--") if have_iso_ref and binned_cc_ref_N.data[bin] is not None: table_row.append("%6d" % (binned_cc_ref_N.data[bin])) else: table_row.append("--") if oe_rint.data[bin] is not None: table_row.append("%.1f%%" % (100. * oe_rint.data[bin])) else: table_row.append("--") if oe_rsplit.data[bin] is not None: table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin])) else: table_row.append("--") if have_iso_ref and ref_riso.data[bin] is not None: table_row.append("%.1f%%" % (100 * ref_riso.data[bin])) else: table_row.append("--") if oe_scale.data[bin] is not None: table_row.append("%.3f" % oe_scale.data[bin]) else: table_row.append("--") if have_iso_ref and ref_scale.data[bin] is not None: table_row.append("%.3f" % ref_scale.data[bin]) else: table_row.append("--") if split_sigma_data.data[bin] is not None: table_row.append("%.4f" % split_sigma_data.data[bin]) else: table_row.append("--") table_data.append(table_row) table_data.append([""] * len(table_header)) table_row = [ format_value("%3s", "All"), format_value("%-13s", " "), format_value( "%13s", "[%d/%d]" % (cumulative_counts_given, cumulative_counts_complete)), format_value("%.1f%%", 100 * corr_int), format_value("%7d", N_int) ] if have_iso_ref: table_row.extend( (format_value("%.1f%%", 100 * corr_iso), format_value("%6d", N_iso))) else: table_row.extend(("--", "--")) table_row.extend((format_value("%.1f%%", 100 * oe_rint_all), format_value("%.1f%%", 100 * oe_rsplit_all))) if have_iso_ref: table_row.append(format_value("%.1f%%", 100 * ref_riso_all)) else: table_row.append("--") table_row.append(format_value("%.3f", oe_scale_all)) if have_iso_ref: table_row.append(format_value("%.3f", ref_scale_all)) else: table_row.append("--") if split_sigma_data_all is not None: table_row.append("%.1f" % split_sigma_data_all) else: table_row.append("--") table_data.append(table_row) self.logger.main_log(' ') self.logger.main_log( table_utils.format(table_data, has_header=2, justify='center', delim=" ")) self.logger.main_log( """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images. Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images. "iso" columns compare the whole XFEL dataset to the isomorphous reference.""" ) self.logger.main_log("Niso: result vs. reference common set") if have_iso_ref: assert N_iso == flex.sum( flex.double([x for x in binned_cc_ref_N.data if x is not None])) assert N_int == flex.sum( flex.double([x for x in binned_cc_int_N.data if x is not None])) # TODO: how is plotting handled in the new phil design? ''' if params.scaling.show_plots: from matplotlib import pyplot as plt plt.plot(flex.log(selected_uniform[-2].data()), flex.log(selected_uniform[-1].data()), 'r.') plt.show() if have_iso_ref: plt.plot(flex.log(selected_uniform[0].data()), flex.log(selected_uniform[1].data()), 'r.') plt.show() ''' self.logger.main_log(' ')
def split_sigma_test(self, this, other, scale, use_binning=False, show_plot=False): """ Calculates the split sigma ratio test by Peter Zwart: ssr = sum( (Iah-Ibh)^2 ) / sum( sigma_ah^2 + sigma_bh^2) where Iah and Ibh are merged intensities for a given hkl from two halves of a dataset (a and b). Likewise for sigma_ah and sigma_bh. ssr (split sigma ratio) should approximately equal 1 if the errors are correctly estimated. """ assert other.size() == this.data().size() assert (this.indices() == other.indices()).all_eq(True) assert not use_binning or this.binner() is not None if use_binning: results = [] for i_bin in this.binner().range_all(): sel = this.binner().selection(i_bin) i_this = this.select(sel) i_other = other.select(sel) scale_rel = scale.data[i_bin] if i_this.size() == 0: results.append(None) else: results.append( self.split_sigma_test(i_this, i_other, scale=scale_rel, show_plot=show_plot)) return binned_data(binner=this.binner(), data=results, data_fmt="%7.4f") a_data = this.data() b_data = scale * other.data() a_sigmas = this.sigmas() b_sigmas = scale * other.sigmas() if show_plot: """ # Diagnostic use of the (I - <I>) / sigma distribution, should have mean=0, std=1 a_variance = a_sigmas * a_sigmas b_variance = b_sigmas * b_sigmas mean_num = (a_data/ (a_variance) ) + (b_data/ (b_variance) ) mean_den = (1./ (a_variance) ) + (1./ (b_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / (a_sigmas) stats_a = flex.mean_and_variance(normal_a) print "\nA mean %7.4f std %7.4f"%(stats_a.mean(),stats_a.unweighted_sample_standard_deviation()) order_a = flex.sort_permutation(normal_a) delta_I_b = b_data - mean_values normal_b = delta_I_b / (b_sigmas) stats_b = flex.mean_and_variance(normal_b) print "B mean %7.4f std %7.4f"%(stats_b.mean(),stats_b.unweighted_sample_standard_deviation()) order_b = flex.sort_permutation(normal_b) # plots for debugging from matplotlib import pyplot as plt plt.plot(range(len(order_a)),normal_a.select(order_a),"b.") plt.plot(range(len(order_b)),normal_b.select(order_b),"r.") plt.show() """ from cctbx.examples.merging.sigma_correction import ccp4_model Correction = ccp4_model() Correction.plots(a_data, b_data, a_sigmas, b_sigmas) #a_new_variance,b_new_variance = Correction.optimize(a_data, b_data, a_sigmas, b_sigmas) #Correction.plots(a_data, b_data, flex.sqrt(a_new_variance), flex.sqrt(b_new_variance)) n = flex.pow(a_data - b_data, 2) d = flex.pow(a_sigmas, 2) + flex.pow(b_sigmas, 2) return flex.sum(n) / flex.sum(d)
def estimate_resolution_limit_distl_method2(reflections, imageset, ice_sel=None, plot_filename=None): # Implementation of Method 2 (section 2.4.4) of: # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon # J. Appl. Cryst. (2006). 39, 112-119 # http://dx.doi.org/10.1107/S0021889805040677 if ice_sel is None: ice_sel = flex.bool(len(reflections), False) variances = reflections['intensity.sum.variance'] sel = variances > 0 intensities = reflections['intensity.sum.value'] variances = variances.select(sel) ice_sel = ice_sel.select(sel) reflections = reflections.select(sel) intensities = reflections['intensity.sum.value'] d_star_sq = flex.pow2(reflections['rlp'].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) d_star_cubed = flex.pow(reflections['rlp'].norms(), 3) binner = binner_d_star_cubed(d_spacings) bin_counts = flex.size_t() for i_slot, slot in enumerate(binner.bins): sel_all = (d_spacings < slot.d_max) & (d_spacings >= slot.d_min) #sel = ~(ice_sel) & sel_all sel = sel_all bin_counts.append(sel.count(True)) #print list(bin_counts) t0 = (bin_counts[0] + bin_counts[1]) / 2 mu = 0.15 for i in range(len(bin_counts) - 1): tj = bin_counts[i] tj1 = bin_counts[i + 1] if (tj < (mu * t0)) and (tj1 < (mu * t0)): break d_min = binner.bins[i].d_min noisiness = 0 m = len(bin_counts) for i in range(m): for j in range(i + 1, m): if bin_counts[i] <= bin_counts[j]: noisiness += 1 noisiness /= (0.5 * m * (m - 1)) if plot_filename is not None: if pyplot is None: raise Sorry("matplotlib must be installed to generate a plot.") fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(range(len(bin_counts)), bin_counts) #ax.set_xlabel('') ax.set_ylabel('number of spots in shell') xlim = pyplot.xlim() ylim = pyplot.ylim() ax.vlines(i, ylim[0], ylim[1], colors='red') pyplot.xlim(0, xlim[1]) pyplot.ylim(0, ylim[1]) pyplot.savefig(plot_filename) pyplot.close() return d_min, noisiness
def estimate_resolution_limit_distl_method2( reflections, imageset, ice_sel=None, plot_filename=None): # Implementation of Method 2 (section 2.4.4) of: # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon # J. Appl. Cryst. (2006). 39, 112-119 # http://dx.doi.org/10.1107/S0021889805040677 if ice_sel is None: ice_sel = flex.bool(len(reflections), False) variances = reflections['intensity.sum.variance'] sel = variances > 0 intensities = reflections['intensity.sum.value'] variances = variances.select(sel) ice_sel = ice_sel.select(sel) reflections = reflections.select(sel) intensities = reflections['intensity.sum.value'] d_star_sq = flex.pow2(reflections['rlp'].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) d_star_cubed = flex.pow(reflections['rlp'].norms(), 3) binner = binner_d_star_cubed(d_spacings) bin_counts = flex.size_t() for i_slot, slot in enumerate(binner.bins): sel_all = (d_spacings < slot.d_max) & (d_spacings >= slot.d_min) #sel = ~(ice_sel) & sel_all sel = sel_all bin_counts.append(sel.count(True)) #print list(bin_counts) t0 = (bin_counts[0] + bin_counts[1])/2 mu = 0.15 for i in range(len(bin_counts)-1): tj = bin_counts[i] tj1 = bin_counts[i+1] if (tj < (mu * t0)) and (tj1 < (mu * t0)): break d_min = binner.bins[i].d_min noisiness = 0 m = len(bin_counts) for i in range(m): for j in range(i+1, m): if bin_counts[i] <= bin_counts[j]: noisiness += 1 noisiness /= (0.5 * m * (m-1)) if plot_filename is not None: if pyplot is None: raise Sorry("matplotlib must be installed to generate a plot.") fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.scatter(range(len(bin_counts)), bin_counts) #ax.set_xlabel('') ax.set_ylabel('number of spots in shell') xlim = pyplot.xlim() ylim = pyplot.ylim() ax.vlines(i, ylim[0], ylim[1], colors='red') pyplot.xlim(0, xlim[1]) pyplot.ylim(0, ylim[1]) pyplot.savefig(plot_filename) pyplot.close() return d_min, noisiness
def estimate_resolution_limit_distl_method1( reflections, imageset, ice_sel=None, plot_filename=None): # Implementation of Method 1 (section 2.4.4) of: # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon # J. Appl. Cryst. (2006). 39, 112-119 # http://dx.doi.org/10.1107/S0021889805040677 if ice_sel is None: ice_sel = flex.bool(len(reflections), False) variances = reflections['intensity.sum.variance'] sel = variances > 0 intensities = reflections['intensity.sum.value'] variances = variances.select(sel) ice_sel = ice_sel.select(sel) reflections = reflections.select(sel) intensities = reflections['intensity.sum.value'] d_star_sq = flex.pow2(reflections['rlp'].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) d_star_cubed = flex.pow(reflections['rlp'].norms(), 3) step = 2 while len(reflections)/step > 40: step += 1 order = flex.sort_permutation(d_spacings, reverse=True) ds3_subset = flex.double() d_subset = flex.double() for i in range(len(reflections)//step): ds3_subset.append(d_star_cubed[order[i*step]]) d_subset.append(d_spacings[order[i*step]]) x = flex.double(range(len(ds3_subset))) # (i) # Usually, Pm is the last point, that is, m = n. But m could be smaller than # n if an unusually high number of spots are detected around a certain # intermediate resolution. In that case, our search for the image resolution # does not go outside the spot 'bump;. This is particularly useful when # ice-rings are present. slopes = (ds3_subset[1:] - ds3_subset[0])/(x[1:]-x[0]) skip_first = 3 p_m = flex.max_index(slopes[skip_first:]) + 1 + skip_first # (ii) from scitbx import matrix x1 = matrix.col((0, ds3_subset[0])) x2 = matrix.col((p_m, ds3_subset[p_m])) gaps = flex.double([0]) v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(1, p_m): x0 = matrix.col((i, ds3_subset[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) mv = flex.mean_and_variance(gaps) s = mv.unweighted_sample_standard_deviation() # (iii) p_k = flex.max_index(gaps) g_k = gaps[p_k] p_g = p_k for i in range(p_k+1, len(gaps)): g_i = gaps[i] if g_i > (g_k - 0.5 * s): p_g = i ds3_g = ds3_subset[p_g] d_g = d_subset[p_g] noisiness = 0 n = len(ds3_subset) for i in range(n-1): for j in range(i+1, n-1): if slopes[i] >= slopes[j]: noisiness += 1 noisiness /= ((n-1)*(n-2)/2) if plot_filename is not None: if pyplot is None: raise Sorry("matplotlib must be installed to generate a plot.") fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.scatter(range(len(ds3_subset)), ds3_subset) #ax.set_xlabel('') ax.set_ylabel('D^-3') xlim = pyplot.xlim() ylim = pyplot.ylim() ax.vlines(p_g, ylim[0], ylim[1], colors='red') pyplot.xlim(0, xlim[1]) pyplot.ylim(0, ylim[1]) pyplot.savefig(plot_filename) pyplot.close() return d_g, noisiness