def exercise_match_multi_indices(): h0 = flex.miller_index(((1,2,3), (-1,-2,-3), (2,3,4), (-2,-3,-4), (3,4,5))) d0 = flex.double((1,2,3,4,5)) h1 = flex.miller_index(((1,2,3), (-2,-3,-4), (1,2,3), (2,3,4))) d1 = flex.double((10,20,30,40)) mi = miller.match_multi_indices(h0, h0) assert mi.have_singles() == 0 assert list(mi.pairs()) == zip(range(5), range(5)) mi = miller.match_multi_indices(h0, h1) assert tuple(mi.singles(0)) == (1,4,) assert tuple(mi.singles(1)) == () assert len(set(mi.pairs()) - set([(0,0), (0,2), (2,3), (3, 1)])) == 0 assert tuple(mi.number_of_matches(0)) == (2, 0, 1, 1, 0) assert tuple(mi.pair_selection(0)) == (1, 0, 1, 1, 0) assert tuple(mi.single_selection(0)) == (0, 1, 0, 0, 1) assert tuple(mi.number_of_matches(1)) == (1, 1, 1, 1) assert tuple(mi.pair_selection(1)) == (1, 1, 1, 1) assert tuple(mi.single_selection(1)) == (0, 0, 0, 0) assert tuple(mi.paired_miller_indices(0)) \ == tuple(h0.select(mi.pair_selection(0))) l1 = list(mi.paired_miller_indices(1)) l2 = list(h1.select(mi.pair_selection(1))) l1.sort() l2.sort() assert l1 == l2 try: miller.match_multi_indices(h1, h0) except RuntimeError, e: pass
def exercise_match_multi_indices(): h0 = flex.miller_index(((1,2,3), (-1,-2,-3), (2,3,4), (-2,-3,-4), (3,4,5))) d0 = flex.double((1,2,3,4,5)) h1 = flex.miller_index(((1,2,3), (-2,-3,-4), (1,2,3), (2,3,4))) d1 = flex.double((10,20,30,40)) mi = miller.match_multi_indices(h0, h0) assert mi.have_singles() == 0 assert list(mi.pairs()) == list(zip(range(5), range(5))) mi = miller.match_multi_indices(h0, h1) assert tuple(mi.singles(0)) == (1,4,) assert tuple(mi.singles(1)) == () assert len(set(mi.pairs()) - set([(0,0), (0,2), (2,3), (3, 1)])) == 0 assert tuple(mi.number_of_matches(0)) == (2, 0, 1, 1, 0) assert tuple(mi.pair_selection(0)) == (1, 0, 1, 1, 0) assert tuple(mi.single_selection(0)) == (0, 1, 0, 0, 1) assert tuple(mi.number_of_matches(1)) == (1, 1, 1, 1) assert tuple(mi.pair_selection(1)) == (1, 1, 1, 1) assert tuple(mi.single_selection(1)) == (0, 0, 0, 0) assert tuple(mi.paired_miller_indices(0)) \ == tuple(h0.select(mi.pair_selection(0))) l1 = list(mi.paired_miller_indices(1)) l2 = list(h1.select(mi.pair_selection(1))) l1.sort() l2.sort() assert l1 == l2 try: miller.match_multi_indices(h1, h0) except RuntimeError as e: pass else: raise Exception_expected
def compute_functional_and_gradients(self): unmerged_intensities = self.apply_scales() merging = unmerged_intensities.merge_equivalents() merged_intensities = merging.array() from cctbx import miller mi = miller.match_multi_indices(merged_intensities.indices(), unmerged_intensities.indices()) f = 0 g = flex.double(self.x.size()) for i in range(unmerged_intensities.size()): w = 1 / unmerged_intensities.sigmas()[i] j = self.batches.data()[i] - self.minb k = self.x[j] p = mi.pairs()[i] mean_I = merged_intensities.data()[p[0]] unmerged_I = self.unmerged_intensities.data()[i] delta = unmerged_I - k * mean_I f += w * delta**2 g[j] += -2 * w * mean_I * delta # print f return f, g
def compute_functional_and_gradients(self): unmerged_intensities = self.apply_scales() merging = unmerged_intensities.merge_equivalents() merged_intensities = merging.array() from cctbx import miller mi = miller.match_multi_indices( merged_intensities.indices(), unmerged_intensities.indices()) f = 0 g = flex.double(self.x.size()) for i in range(unmerged_intensities.size()): w = 1/unmerged_intensities.sigmas()[i] j = self.batches.data()[i] - self.minb k = self.x[j] p = mi.pairs()[i] mean_I = merged_intensities.data()[p[0]] unmerged_I = self.unmerged_intensities.data()[i] delta = unmerged_I - k * mean_I f += (w * delta**2) g[j] += (- 2 * w * mean_I * delta) #print f return f, g
def result_for_cxi_merge(self, file_name): scaler = self.refinery.scaler_callable( self.parameterization_class(self.MINI.x)) if self.params.postrefinement.algorithm == "rs": fat_selection = (self.refinery.lorentz_callable( self.parameterization_class(self.MINI.x)) > 0.2) else: fat_selection = (self.refinery.lorentz_callable( self.parameterization_class(self.MINI.x)) < 0.9) fat_count = fat_selection.count(True) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError, "< 3 near-fulls after refinement" print >> self.out, "On total %5d the fat selection is %5d" % (len( self.observations_pair1_selected.indices()), fat_count) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices=self.observations_pair1_selected.indices().select( fat_selection), data=(self.observations_pair1_selected.data() / scaler).select(fat_selection), sigmas=(self.observations_pair1_selected.sigmas() / scaler).select(fat_selection)) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) return observations_original_index, observations, matches
def result_for_cxi_merge(self, file_name): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.nave1_refinery.scaler_callable( self.get_parameter_values()) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow( partiality_array, 0.5 * self.params.postrefinement.merge_partiality_exponent) fat_selection = ( self.nave1_refinery.lorentz_callable(self.get_parameter_values()) > self.params.postrefinement.rs_hybrid.partiality_threshold ) # was 0.2 for rs2 fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError("< 3 near-fulls after refinement") print >> self.out, "On total %5d the fat selection is %5d" % (len( self.observations_pair1_selected.indices()), fat_count) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices=self.observations_pair1_selected.indices().select( fat_selection), data=( self.observations_pair1_selected.data().select(fat_selection) / scaler_s), sigmas=( self.observations_pair1_selected.sigmas().select(fat_selection) / (scaler_s * p_scaler_s))) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double( [self.i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [self.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) I_weight.set_selected(I_invalid, 0.) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) print >> self.out, "CORR: NEW correlation is", SWC.corr print >> self.out, "ASTAR_FILE", file_name, tuple( self.nave1_refinery.get_eff_Astar(values)) self.final_corr = SWC.corr #another range assertion assert self.final_corr > 0.1, "correlation coefficient out of range (<= 0.1) after LevMar refinement" # XXX Specific to the hybrid_rs method, and likely these limits are problem-specific (especially G-max) so look for another approach # or expose the limits as phil parameters. assert values.G < 0.5, "G-scale value out of range ( > 0.5 XXX may be too strict ) after LevMar refinement" return observations_original_index, observations, matches
def rmerge_vs_batch(intensities, batches): """Determine batches and Rmerge values per batch.""" assert intensities.size() == batches.size() intensities = intensities.map_to_asu() merging = intensities.merge_equivalents() merged_intensities = merging.array() perm = flex.sort_permutation(batches.data()) batches = batches.data().select(perm) intensities = intensities.select(perm) pairs = miller.match_multi_indices(merged_intensities.indices(), intensities.indices()).pairs() def r_merge_per_batch(pairs): """Calculate R_merge for the list of (merged-I, I) pairs.""" merged_indices, unmerged_indices = zip(*pairs) unmerged_Ij = intensities.data().select(flex.size_t(unmerged_indices)) merged_Ij = merged_intensities.data().select( flex.size_t(merged_indices)) numerator = flex.sum(flex.abs(unmerged_Ij - merged_Ij)) denominator = flex.sum(unmerged_Ij) if denominator > 0: return numerator / denominator return 0 return _batch_bins_and_data(batches, pairs, function_to_apply=r_merge_per_batch)
def result_for_cxi_merge(self, file_name): scaler = self.refinery.scaler_callable(self.parameterization_class(self.MINI.x)) if self.params.postrefinement.algorithm=="rs": fat_selection = (self.refinery.lorentz_callable(self.parameterization_class(self.MINI.x)) > 0.2) else: fat_selection = (self.refinery.lorentz_callable(self.parameterization_class(self.MINI.x)) < 0.9) fat_count = fat_selection.count(True) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError print >> self.out, "On total %5d the fat selection is %5d"%( len(self.observations_pair1_selected.indices()), fat_count) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices = self.observations_pair1_selected.indices().select(fat_selection), data = (self.observations_pair1_selected.data()/scaler).select(fat_selection), sigmas = (self.observations_pair1_selected.sigmas()/scaler).select(fat_selection) ) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) return observations_original_index,observations,matches
def calculate_cross_correlation(self, miller_array_1, miller_array_2): # Get pre-created resolution binning objects from the parameters self.resolution_binner = self.params.statistics.resolution_binner self.hkl_resolution_bins = self.params.statistics.hkl_resolution_bins # How many bins do we have? n_bins = self.resolution_binner.n_bins_all() # (self.params.statistics.n_bins + 2), 2 - to account for the hkls outside of the binner resolution range # To enable MPI all-rank reduction, every rank must initialize statistics array(s), even if the rank doesn't have any reflections. self.cc_N = flex.int(n_bins, 0) self.cc_sum_xx = flex.double(n_bins, 0.0) self.cc_sum_xy = flex.double(n_bins, 0.0) self.cc_sum_yy = flex.double(n_bins, 0.0) self.cc_sum_x = flex.double(n_bins, 0.0) self.cc_sum_y = flex.double(n_bins, 0.0) # Find matching indices in the two data sets matching_indices = miller.match_multi_indices(miller_indices_unique = miller_array_1.indices(), miller_indices = miller_array_2.indices()) # Perform binned summations for all components of the cross-correlation formula for pair in matching_indices.pairs(): hkl = miller_array_1.indices()[pair[0]] assert hkl == miller_array_2.indices()[pair[1]] if hkl in self.hkl_resolution_bins: i_bin = self.hkl_resolution_bins[hkl] I_x = miller_array_1.data()[pair[0]] I_y = miller_array_2.data()[pair[1]] self.cc_N[i_bin] += 1 self.cc_sum_xx[i_bin] += I_x**2 self.cc_sum_yy[i_bin] += I_y**2 self.cc_sum_xy[i_bin] += I_x * I_y self.cc_sum_x[i_bin] += I_x self.cc_sum_y[i_bin] += I_y # Accumulate binned counts (cc_N) and sums (cc_sum) from all ranks all_ranks_cc_N = self.mpi_helper.cumulative_flex(self.cc_N, flex.int) all_ranks_cc_sum_xx = self.mpi_helper.cumulative_flex(self.cc_sum_xx, flex.double) all_ranks_cc_sum_yy = self.mpi_helper.cumulative_flex(self.cc_sum_yy, flex.double) all_ranks_cc_sum_xy = self.mpi_helper.cumulative_flex(self.cc_sum_xy, flex.double) all_ranks_cc_sum_x = self.mpi_helper.cumulative_flex(self.cc_sum_x, flex.double) all_ranks_cc_sum_y = self.mpi_helper.cumulative_flex(self.cc_sum_y, flex.double) # Reduce all binned counts (cc_N) and sums (cc_sum) from all ranks if self.mpi_helper.rank == 0: return self.build_cross_correlation_table( all_ranks_cc_N, all_ranks_cc_sum_xx, all_ranks_cc_sum_yy, all_ranks_cc_sum_xy, all_ranks_cc_sum_x, all_ranks_cc_sum_y) else: return None
def result_for_cxi_merge(self, file_name): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.refinery.scaler_callable( self.parameterization_class(self.MINI.x)) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow( partiality_array, 0.5 * self.params.postrefinement.merge_partiality_exponent) fat_selection = (partiality_array > 0.2) fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError("< 3 near-fulls after refinement") print("On total %5d the fat selection is %5d" % (len(self.observations_pair1_selected.indices()), fat_count), file=self.out) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices=self.observations_pair1_selected.indices().select( fat_selection), data=( self.observations_pair1_selected.data().select(fat_selection) / scaler_s), sigmas=( self.observations_pair1_selected.sigmas().select(fat_selection) / (scaler_s * p_scaler_s))) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double( [self.i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [self.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) I_weight.set_selected(I_invalid, 0.) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) print("CORR: NEW correlation is", SWC.corr, file=self.out) print("ASTAR_FILE", file_name, tuple(self.refinery.get_eff_Astar(values)), file=self.out) self.final_corr = SWC.corr self.refined_mini = self.MINI #another range assertion assert self.final_corr > 0.1, "correlation coefficient out of range (<= 0.1) after rs2 refinement" return observations_original_index, observations, matches
def reduce_by_miller_index(self, miller_indices): sequences = range(self.get_size()) matches = miller.match_multi_indices( miller_indices_unique=miller_indices, miller_indices=self.miller_indices_merge) pair_1 = flex.int([pair[1] for pair in matches.pairs()]) sequences_bin = flex.size_t( [sequences[pair_1[j]] for j in range(len(matches.pairs()))]) self.reduce_by_selection(sequences_bin)
def result_for_cxi_merge(self): scaler = self.refinery.scaler_callable( self.parameterization_class(self.MINI.x)) if self.params.postrefinement.algorithm == "rs": fat_selection = (self.refinery.lorentz_callable( self.parameterization_class(self.MINI.x)) > 0.2) fats = self.refinery.lorentz_callable( self.parameterization_class(self.MINI.x)) else: fat_selection = (self.refinery.lorentz_callable( self.parameterization_class(self.MINI.x)) < 0.9) fat_count = fat_selection.count(True) # reject an experiment with insufficient number of near-full reflections if fat_count < 3: if self.params.output.log_level == 0: self.logger.log( "Rejected experiment, because: On total %5d the fat selection is %5d" % (len(self.observations_pair1_selected.indices()), fat_count)) ''' # debugging rejected_fat_max = 0.0 for fat in fats: if fat <= 0.2: if fat > rejected_fat_max: rejected_fat_max = fat self.logger.log("MAXIMUM FAT VALUE AMONG REJECTED REFLECTIONS IS: %f"%rejected_fat_max) ''' raise ValueError("< 3 near-fulls after refinement") if self.params.output.log_level == 0: self.logger.log( "On total %5d the fat selection is %5d" % (len(self.observations_pair1_selected.indices()), fat_count)) observations_original_index = self.observations_original_index_pair1_selected.select( fat_selection) observations = self.observations_pair1_selected.customized_copy( indices=self.observations_pair1_selected.indices().select( fat_selection), data=(self.observations_pair1_selected.data() / scaler).select(fat_selection), sigmas=(self.observations_pair1_selected.sigmas() / scaler).select(fat_selection)) matches = miller.match_multi_indices( miller_indices_unique=self.params.scaling.miller_set.indices(), miller_indices=observations.indices()) return observations_original_index, observations, matches
def result_for_cxi_merge(self): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.refinery.scaler_callable(self.parameterization_class(self.MINI.x)) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow(partiality_array, 0.5*self.params.postrefinement.merge_partiality_exponent) fat_selection = (partiality_array > 0.2) fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) # reject an experiment with insufficient number of near-full reflections if fat_count < 3: if self.params.output.log_level == 0: self.logger.log("Rejected experiment, because: On total %5d the fat selection is %5d"%(len(self.observations_pair1_selected.indices()), fat_count)) raise ValueError("< 3 near-fulls after refinement") if self.params.output.log_level == 0: self.logger.log("On total %5d the fat selection is %5d"%(len(self.observations_pair1_selected.indices()), fat_count)) observations_original_index = self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices = self.observations_pair1_selected.indices().select(fat_selection), data = (self.observations_pair1_selected.data().select(fat_selection)/scaler_s), sigmas = (self.observations_pair1_selected.sigmas().select(fat_selection)/(scaler_s * p_scaler_s)) ) matches = miller.match_multi_indices( miller_indices_unique=self.params.scaling.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double([self.params.scaling.i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool([self.params.scaling.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) I_weight.set_selected(I_invalid,0.) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) if self.params.output.log_level == 0: self.logger.log("CORR: NEW correlation is: %f"%SWC.corr) self.logger.log("ASTAR: ") self.logger.log(tuple(self.refinery.get_eff_Astar(values))) self.final_corr = SWC.corr self.refined_mini = self.MINI #another range assertion assert self.final_corr > 0.1,"correlation coefficient out of range (<= 0.1) after rs2 refinement" return observations_original_index, observations, matches
def rmerge_vs_batch(intensities, batches): assert intensities.size() == batches.size() intensities = intensities.map_to_asu() bins = [] data = [] merging = intensities.merge_equivalents() merged_intensities = merging.array() perm = flex.sort_permutation(batches.data()) batches = batches.data().select(perm) intensities = intensities.select(perm) from cctbx import miller matches = miller.match_multi_indices( merged_intensities.indices(), intensities.indices()) pairs = matches.pairs() i_batch_start = 0 current_batch = flex.min(batches) n_ref = batches.size() for i_ref in range(n_ref + 1): if i_ref == n_ref or batches[i_ref] != current_batch: assert batches[i_batch_start:i_ref].all_eq(current_batch) numerator = 0 denominator = 0 for p in pairs[i_batch_start:i_ref]: unmerged_Ij = intensities.data()[p[1]] merged_Ij = merged_intensities.data()[p[0]] numerator += abs(unmerged_Ij - merged_Ij) denominator += unmerged_Ij bins.append(current_batch) if denominator > 0: data.append(numerator / denominator) else: data.append(0) i_batch_start = i_ref if i_ref < n_ref: current_batch = batches[i_batch_start] return batch_binned_data(bins, data)
def get_cciso(self, miller_array_iso): cciso, n_refl_cciso = (0, 0) if miller_array_iso: matches_iso = miller.match_multi_indices( miller_indices_unique=miller_array_iso.indices(), miller_indices=self.miller_array_merge.indices()) I_iso = flex.double([ miller_array_iso.data()[pair[0]] for pair in matches_iso.pairs() ]) I_merge_match_iso = flex.double( [self.I_merge[pair[1]] for pair in matches_iso.pairs()]) n_refl_cciso = len(matches_iso.pairs()) if len(matches_iso.pairs()) > 0: cciso = flex.linear_correlation(I_merge_match_iso, I_iso).coefficient() return cciso, n_refl_cciso
def result_for_cxi_merge(self, file_name): values = self.get_parameter_values() self.rs2_parameter_range_assertions(values) scaler = self.nave1_refinery.scaler_callable(self.get_parameter_values()) partiality_array = self.refinery.get_partiality_array(values) p_scaler = flex.pow(partiality_array, 0.5*self.params.postrefinement.merge_partiality_exponent) fat_selection = (self.nave1_refinery.lorentz_callable(self.get_parameter_values()) > self.params.postrefinement.rs_hybrid.partiality_threshold) # was 0.2 for rs2 fat_count = fat_selection.count(True) scaler_s = scaler.select(fat_selection) p_scaler_s = p_scaler.select(fat_selection) #avoid empty database INSERT, if insufficient centrally-located Bragg spots: # in samosa, handle this at a higher level, but handle it somehow. if fat_count < 3: raise ValueError, "< 3 near-fulls after refinement" print >> self.out, "On total %5d the fat selection is %5d"%( len(self.observations_pair1_selected.indices()), fat_count) observations_original_index = \ self.observations_original_index_pair1_selected.select(fat_selection) observations = self.observations_pair1_selected.customized_copy( indices = self.observations_pair1_selected.indices().select(fat_selection), data = (self.observations_pair1_selected.data().select(fat_selection)/scaler_s), sigmas = (self.observations_pair1_selected.sigmas().select(fat_selection)/(scaler_s * p_scaler_s)) ) matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) I_weight = flex.double(len(observations.sigmas()), 1.) I_reference = flex.double([self.i_model.data()[pair[0]] for pair in matches.pairs()]) SWC = simple_weighted_correlation(I_weight, I_reference, observations.data()) print >> self.out, "CORR: NEW correlation is", SWC.corr self.final_corr = SWC.corr #another range assertion assert self.final_corr > 0.1,"correlation coefficient out of range (<= 0.1) after LevMar refinement" # XXX Specific to the hybrid_rs method, and likely these limits are problem-specific (especially G-max) so look for another approach # or expose the limits as phil parameters. assert values.G < 0.5 , "G-scale value out of range ( > 0.5 XXX may be too strict ) after LevMar refinement" return observations_original_index,observations,matches
def calc_cc(miller_array_ref, miller_array_obs): ''' Calculate cc between matched intensities. ''' matches_ref = miller.match_multi_indices( miller_indices_unique=miller_array_ref.indices(), miller_indices=miller_array_obs.indices()) I_ref = flex.double([miller_array_ref.data()[pair[0]] for pair in matches_ref.pairs()]) I_o = flex.double([miller_array_obs.data()[pair[1]] for pair in matches_ref.pairs()]) cc = 0 n_refl = 0 if len(matches_ref.pairs()) > 0 : cc = np.corrcoef(I_o, I_ref)[0,1] if math.isnan(cc): cc = 0 n_refl = len(matches_ref.pairs()) return cc, n_refl
def get_correlation(cb_op=None): """ Helper function to get CC to the reference given an operator """ # Build a miller array for the experiment reflections exp_miller_indices = miller.set( target_symm, exp_reflections['miller_index_asymmetric'], True) exp_intensities = miller.array( exp_miller_indices, exp_reflections['intensity.sum.value'], flex.sqrt(exp_reflections['intensity.sum.variance'])) if cb_op: exp_intensities = exp_intensities.change_basis( cb_op).map_to_asu() # Extract an array of HKLs from the model to match the experiment HKLs matching_indices = miller.match_multi_indices( miller_indices_unique=model_intensities.indices(), miller_indices=exp_intensities.indices()) # Least squares scaling_result = scaler.fit_experiment_to_reference( model_intensities, exp_intensities, matching_indices) return scaling_result.correlation if scaling_result.correlation is not None else -1
def main(pr_pickle_file): pres = pickle.load(open(pr_pickle_file, 'rb')) if pres is not None: #sort according to indices perm = pres.observations.sort_permutation(by_value="packed_indices") obs_asu = pres.observations.select(perm) partiality = pres.partiality.select(perm) #correct to full reflections obs_asu = obs_asu.customized_copy(data=obs_asu.data() / partiality, sigmas=obs_asu.sigmas() / partiality) #group by similar indices obs_uniq = obs_asu.merge_equivalents().array() matches_uniq = miller.match_multi_indices( miller_indices_unique=obs_uniq.indices(), miller_indices=obs_asu.indices()) pair_0 = flex.int([pair[0] for pair in matches_uniq.pairs()]) pair_1 = flex.int([pair[1] for pair in matches_uniq.pairs()]) group_id_list = flex.int( [pair_0[pair_1[i]] for i in range(len(matches_uniq.pairs()))]) tally = Counter() for elem in group_id_list: tally[elem] += 1 #select only tokens with count > 1 poly_tokens = [k for k, v in tally.iteritems() if v > 1] delta_I = flex.double() obs_I = flex.double() for token in poly_tokens: obs_group = obs_asu.select( obs_asu.indices() == obs_uniq.indices()[token]) I_avg = flex.mean(obs_group.data()) delta_I.extend( flex.double([abs(I - I_avg) for I in obs_group.data()])) obs_I.extend(obs_group.data()) #if pres.pickle_filename.endswith('int_monarin_1_01380.pickle'): # for ind, I, sigI, dI in zip(obs_group.indices(), obs_group.data(), obs_group.sigmas(), flex.abs(obs_group.data()-I_avg)): # print ind, I, sigI, dI if len(obs_I) > 0: print pres.pickle_filename, '%6.2f' % (flex.sum(delta_I) * 100 / flex.sum(flex.abs(obs_I)))
for i, j in zip(i_model.indices(),i_model.data()): print>>write_I, j count=count+1 print "Total number of miller indecis: ", count write_I.close() random.seed(0) frame=0 write_G=open("G_reference_"+str(n_frames)+".db","w+") write=open("PSI_simulated_observations_"+str(n_frames)+".db","w+") for files in os.listdir("/reg/neh/home/mamin03/PSI/integration"): print "Frame number ", frame if frame==n_frames: break d=easy_pickle.load("/reg/neh/home/mamin03/PSI/integration/"+files) A=d['observations'][0].as_non_anomalous_array().map_to_asu() matches = miller.match_multi_indices( miller_indices_unique=i_model.indices(), miller_indices=A.indices()) scale=random.random()*10 #scale factor for each frame (simulated G) print>>write_G, scale for pair in matches.pairs(): # print pair[0] , i_model.indices()[pair[0]], A.indices()[pair[1]], scale*i_model.data()[pair[0]], i_model.sigmas()[pair[0]] scaled_I=scale*i_model.data()[pair[0]] sigma=math.sqrt(scaled_I) #scaled_I=scaled_I+random.normalvariate(0, sigma) print>> write, pair[0], scaled_I, sigma, 0, 0, frame frame=frame+1 write.close() write_G.close() datafile="PSI_simulated_observations_"+str(n_frames)+".db" with open(datafile) as f:
def postrefine_by_frame(self, frame_no, pickle_filename, iparams, miller_array_ref, pres_in, avg_mode): #1. Prepare data observations_pickle = read_frame(pickle_filename) pickle_filepaths = pickle_filename.split('/') img_filename_only = pickle_filepaths[len(pickle_filepaths) - 1] txt_exception = ' {0:40} ==> '.format(img_filename_only) if observations_pickle is None: txt_exception += 'empty or bad input file\n' return None, txt_exception inputs, txt_organize_input = self.organize_input( observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename) if inputs is not None: observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, wavelength, crystal_init_orientation = inputs else: txt_exception += txt_organize_input + '\n' return None, txt_exception #2. Select data for post-refinement (only select indices that are common with the reference set observations_non_polar, index_basis_name = self.get_observations_non_polar( observations_original, pickle_filename, iparams) matches = miller.match_multi_indices( miller_indices_unique=miller_array_ref.indices(), miller_indices=observations_non_polar.indices()) pair_0 = flex.size_t([pair[0] for pair in matches.pairs()]) pair_1 = flex.size_t([pair[1] for pair in matches.pairs()]) references_sel = miller_array_ref.select(pair_0) observations_original_sel = observations_original.select(pair_1) observations_non_polar_sel = observations_non_polar.select(pair_1) alpha_angle_set = alpha_angle.select(pair_1) spot_pred_x_mm_set = spot_pred_x_mm.select(pair_1) spot_pred_y_mm_set = spot_pred_y_mm.select(pair_1) #4. Do least-squares refinement lsqrh = leastsqr_handler() try: refined_params, stats, n_refl_postrefined = lsqrh.optimize( references_sel.data(), observations_original_sel, wavelength, crystal_init_orientation, alpha_angle_set, spot_pred_x_mm_set, spot_pred_y_mm_set, iparams, pres_in, observations_non_polar_sel, detector_distance_mm) except Exception: txt_exception += 'optimization failed.\n' return None, txt_exception #caculate partiality for output (with target_anomalous check) G_fin, B_fin, rotx_fin, roty_fin, ry_fin, rz_fin, r0_fin, re_fin, voigt_nu_fin, \ a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin = refined_params inputs, txt_organize_input = self.organize_input( observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename) observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, wavelength, crystal_init_orientation = inputs observations_non_polar, index_basis_name = self.get_observations_non_polar( observations_original, pickle_filename, iparams) from cctbx.uctbx import unit_cell uc_fin = unit_cell( (a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin)) if pres_in is not None: crystal_init_orientation = pres_in.crystal_orientation two_theta = observations_original.two_theta( wavelength=wavelength).data() ph = partiality_handler() partiality_fin, dummy, rs_fin, rh_fin = ph.calc_partiality_anisotropy_set( uc_fin, rotx_fin, roty_fin, observations_original.indices(), ry_fin, rz_fin, r0_fin, re_fin, voigt_nu_fin, two_theta, alpha_angle, wavelength, crystal_init_orientation, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence) #calculate the new crystal orientation O = sqr(uc_fin.orthogonalization_matrix()).transpose() R = sqr(crystal_init_orientation.crystal_rotation_matrix()).transpose() from cctbx.crystal_orientation import crystal_orientation, basis_type CO = crystal_orientation(O * R, basis_type.direct) crystal_fin_orientation = CO.rotate_thru( (1, 0, 0), rotx_fin).rotate_thru((0, 1, 0), roty_fin) #remove reflections with partiality below threshold i_sel = partiality_fin > iparams.merge.partiality_min partiality_fin_sel = partiality_fin.select(i_sel) rs_fin_sel = rs_fin.select(i_sel) rh_fin_sel = rh_fin.select(i_sel) observations_non_polar_sel = observations_non_polar.customized_copy(\ indices=observations_non_polar.indices().select(i_sel), data=observations_non_polar.data().select(i_sel), sigmas=observations_non_polar.sigmas().select(i_sel)) observations_original_sel = observations_original.customized_copy(\ indices=observations_original.indices().select(i_sel), data=observations_original.data().select(i_sel), sigmas=observations_original.sigmas().select(i_sel)) pres = postref_results() pres.set_params(observations=observations_non_polar_sel, observations_original=observations_original_sel, refined_params=refined_params, stats=stats, partiality=partiality_fin_sel, rs_set=rs_fin_sel, rh_set=rh_fin_sel, frame_no=frame_no, pickle_filename=pickle_filename, wavelength=wavelength, crystal_orientation=crystal_fin_orientation, detector_distance_mm=detector_distance_mm) r_change = ((pres.R_final - pres.R_init) / pres.R_init) * 100 r_xy_change = ( (pres.R_xy_final - pres.R_xy_init) / pres.R_xy_init) * 100 cc_change = ((pres.CC_final - pres.CC_init) / pres.CC_init) * 100 txt_postref = '{0:40} => RES:{1:5.2f} NREFL:{2:5d} R:{3:6.1f}% RXY:{4:5.1f}% CC:{5:5.1f}% G:{6:6.4f} B:{7:5.1f} CELL:{8:6.1f}{9:6.1f} {10:6.1f} {11:5.1f} {12:5.1f} {13:5.1f}'.format( img_filename_only + ' (' + index_basis_name + ')', observations_original_sel.d_min(), len(observations_original_sel.data()), r_change, r_xy_change, cc_change, pres.G, pres.B, a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin) print(txt_postref) txt_postref += '\n' return pres, txt_postref
def common_filter(self, observations_hdlr_other): obs_hdlr = observations_handler(self.iparams) obs_hdlr.copy_from(self) obs_hdlr_other = observations_handler(self.iparams) obs_hdlr_other.copy_from(observations_hdlr_other) obs_match, obs_other_match = obs_hdlr.observations.common_sets( obs_hdlr_other.observations, assert_is_similar_symmetry=False) ma_obs_original = obs_hdlr.observations.customized_copy( data=obs_hdlr.observations_original.indices()) ma_partiality = obs_hdlr.observations.customized_copy( data=obs_hdlr.partiality) ma_rs_set = obs_hdlr.observations.customized_copy(data=obs_hdlr.rs_set) ma_rh_set = obs_hdlr.observations.customized_copy(data=obs_hdlr.rh_set) ma_bragg_angle_set = obs_hdlr.observations.customized_copy( data=obs_hdlr.bragg_angle_set) ma_alpha_angle_set = obs_hdlr.observations.customized_copy( data=obs_hdlr.alpha_angle_set) ma_spot_pred_x_mm_set = obs_hdlr.observations.customized_copy( data=obs_hdlr.spot_pred_x_mm_set) ma_spot_pred_y_mm_set = obs_hdlr.observations.customized_copy( data=obs_hdlr.spot_pred_y_mm_set) ma_obs_original_other = obs_hdlr.observations.customized_copy( data=obs_hdlr_other.observations_original.indices()) ma_partiality_other = obs_hdlr_other.observations.customized_copy( data=obs_hdlr_other.partiality) ma_rs_set_other = obs_hdlr_other.observations.customized_copy( data=obs_hdlr_other.rs_set) ma_rh_set_other = obs_hdlr_other.observations.customized_copy( data=obs_hdlr_other.rh_set) ma_bragg_angle_set_other = obs_hdlr_other.observations.customized_copy( data=obs_hdlr_other.bragg_angle_set) ma_alpha_angle_set_other = obs_hdlr_other.observations.customized_copy( data=obs_hdlr_other.alpha_angle_set) ma_spot_pred_x_mm_set_other = obs_hdlr_other.observations.customized_copy( data=obs_hdlr_other.spot_pred_x_mm_set) ma_spot_pred_y_mm_set_other = obs_hdlr_other.observations.customized_copy( data=obs_hdlr_other.spot_pred_y_mm_set) obs_match, obs_other_match = obs_hdlr.observations.common_sets( obs_hdlr_other.observations, assert_is_similar_symmetry=False) ma_partiality, ma_partiality_other = ma_partiality.common_sets( ma_partiality_other, assert_is_similar_symmetry=False) ma_rs_set, ma_rs_set_other = ma_rs_set.common_sets( ma_rs_set_other, assert_is_similar_symmetry=False) ma_rh_set, ma_rh_set_other = ma_rh_set.common_sets( ma_rh_set_other, assert_is_similar_symmetry=False) ma_bragg_angle_set, ma_bragg_angle_set_other = ma_bragg_angle_set.common_sets( ma_bragg_angle_set, assert_is_similar_symmetry=False) ma_alpha_angle_set, ma_alpha_angle_set_other = ma_alpha_angle_set.common_sets( ma_alpha_angle_set_other, assert_is_similar_symmetry=False) ma_spot_pred_x_mm_set, ma_spot_pred_x_mm_set_other = ma_spot_pred_x_mm_set.common_sets( ma_spot_pred_x_mm_set_other, assert_is_similar_symmetry=False) ma_spot_pred_y_mm_set, ma_spot_pred_y_mm_set_other = ma_spot_pred_y_mm_set.common_sets( ma_spot_pred_y_mm_set_other, assert_is_similar_symmetry=False) matches = miller.match_multi_indices( miller_indices_unique=obs_hdlr.observations.indices(), miller_indices=obs_match.indices()) miller_indices_original = flex.miller_index([ obs_hdlr.observations_original.indices()[pair[0]] for pair in matches.pairs() ]) matches = miller.match_multi_indices( miller_indices_unique=obs_hdlr_other.observations.indices(), miller_indices=obs_match.indices()) miller_indices_original_other = flex.miller_index([ obs_hdlr_other.observations_original.indices()[pair[0]] for pair in matches.pairs() ]) obs_original_match = obs_match.customized_copy( indices=miller_indices_original) obs_original_other_match = obs_other_match.customized_copy( indices=miller_indices_original_other) if obs_hdlr.partiality is not None: obs_hdlr.set_params( observations=obs_match, observations_original=obs_original_match, partiality=ma_partiality.data(), rs_set=ma_rs_set.data(), rh_set=ma_rh_set.data(), bragg_angle_set=ma_bragg_angle_set.data(), alpha_angle_set=ma_alpha_angle_set.data(), spot_pred_x_mm_set=ma_spot_pred_x_mm_set.data(), spot_pred_y_mm_set=ma_spot_pred_y_mm_set.data()) obs_hdlr_other.set_params( observations=obs_other_match, observations_original=obs_original_other_match, partiality=ma_partiality_other.data(), rs_set=ma_rs_set_other.data(), rh_set=ma_rh_set_other.data(), bragg_angle_set=ma_bragg_angle_set_other.data(), alpha_angle_set=ma_alpha_angle_set_other.data(), spot_pred_x_mm_set=ma_spot_pred_x_mm_set_other.data(), spot_pred_y_mm_set=ma_spot_pred_y_mm_set_other.data()) else: obs_hdlr.set_params(observations=obs_match, observations_original=obs_original_match) obs_hdlr_other.set_params( observations=obs_other_match, observations_original=obs_original_other_match) return obs_hdlr, obs_hdlr_other
def __init__(self,datadir,work_params,plot=False,esd_plot=False,half_data_flag=0): casetag = work_params.output.prefix # read the ground truth values back in import cPickle as pickle # it is assumed (for now) that the reference millers contain a complete asymmetric unit # of indices, within the (d_max,d_min) region of interest and possibly outside the region. reference_millers = pickle.load(open(os.path.join(datadir,casetag+"_miller.pickle"),"rb")) experiment_manager = read_experiments(work_params) obs = pickle.load(open(os.path.join(datadir,casetag+"_observation.pickle"),"rb")) print "Read in %d observations"%(len(obs["observed_intensity"])) reference_millers.show_summary(prefix="Miller index file ") print len(obs["frame_lookup"]),len(obs["observed_intensity"]), flex.max(obs['miller_lookup']),flex.max(obs['frame_lookup']) max_frameno = flex.max(obs["frame_lookup"]) from iotbx import mtz mtz_object = mtz.object(file_name=work_params.scaling.mtz_file) #for array in mtz_object.as_miller_arrays(): # this_label = array.info().label_string() # print this_label, array.observation_type() I_sim = mtz_object.as_miller_arrays()[0].as_intensity_array() I_sim.show_summary() MODEL_REINDEX_OP = work_params.model_reindex_op I_sim = I_sim.change_basis(MODEL_REINDEX_OP).map_to_asu() #match up isomorphous (the simulated fake F's) with experimental unique set matches = miller.match_multi_indices( miller_indices_unique=reference_millers.indices(), miller_indices=I_sim.indices()) print "original unique",len(reference_millers.indices()) print "isomorphous set",len(I_sim.indices()) print "pairs",len(matches.pairs()) iso_data = flex.double(len(reference_millers.indices())) for pair in matches.pairs(): iso_data[pair[0]] = I_sim.data()[pair[1]] reference_data = miller.array(miller_set = reference_millers, data = iso_data) reference_data.set_observation_type_xray_intensity() FOBS = prepare_observations_for_scaling(work_params,obs=obs, reference_intensities=reference_data, files = experiment_manager.get_files(), half_data_flag=half_data_flag) I,I_visited,G,G_visited = I_and_G_base_estimate(FOBS,params=work_params) print "I length",len(I), "G length",len(G), "(Reference set; entire asymmetric unit)" assert len(reference_data.data()) == len(I) #presumably these assertions fail when half data are taken for CC1/2 or d_min is cut model_I = reference_data.data()[0:len(I)] T = Timer("%d frames"%(len(G), )) mapper = mapper_factory(xscale6e) minimizer = mapper(I,G,I_visited,G_visited,FOBS,params=work_params, experiments=experiment_manager.get_experiments()) del T minimizer.show_summary() Fit = minimizer.e_unpack() Gstats=flex.mean_and_variance(Fit["G"].select(G_visited==1)) print "G mean and standard deviation:",Gstats.mean(),Gstats.unweighted_sample_standard_deviation() if "Bfactor" in work_params.levmar.parameter_flags: Bstats=flex.mean_and_variance(Fit["B"].select(G_visited==1)) print "B mean and standard deviation:",Bstats.mean(),Bstats.unweighted_sample_standard_deviation() show_correlation(Fit["I"],model_I,I_visited,"Correlation of I:") Fit_stddev = minimizer.e_unpack_stddev() # XXX FIXME known bug: the length of Fit["G"] could be smaller than the length of experiment_manager.get_files() # Not sure if this has any operational drawbacks. It's a result of half-dataset selection. if plot: plot_it(Fit["I"], model_I, mode="I") if "Rxy" in work_params.levmar.parameter_flags: show_histogram(Fit["Ax"],"Histogram of x rotation (degrees)") show_histogram(Fit["Ay"],"Histogram of y rotation (degrees)") print if esd_plot: minimizer.esd_plot() from cctbx.examples.merging.show_results import show_overall_observations table1,self.n_bins,self.d_min = show_overall_observations( Fit["I"],Fit_stddev["I"],I_visited, reference_data,FOBS,title="Statistics for all reflections", work_params = work_params) self.FSIM=FOBS self.ordered_intensities=reference_data self.reference_millers=reference_millers self.Fit_I=Fit["I"] self.Fit_I_stddev=Fit_stddev["I"] self.I_visited=I_visited self.Fit = Fit self.experiments = experiment_manager
def scale_frame_detail(self,timestamp,cursor,do_inserts=True,result=None):#, file_name, db_mgr, out): if result is None: result = self.params # If the pickled integration file does not contain a wavelength, # fall back on the value given on the command line. XXX The # wavelength parameter should probably be removed from master_phil # once all pickled integration files contain it. wavelength = result["wavelength"] assert (wavelength > 0) # Do not apply polarization correction here, as this requires knowledge of # pixel size at minimum, and full detector geometry in general. The optimal # redesign would be to apply the polarization correction just after the integration # step in the integration code. print "Step 3. Correct for polarization." observations = result["observations"][0] indexed_cell = observations.unit_cell() observations_original_index = observations.deep_copy() assert len(observations_original_index.indices()) == len(observations.indices()) # Now manipulate the data to conform to unit cell, asu, and space group # of reference. The resolution will be cut later. # Only works if there is NOT an indexing ambiguity! #observations = observations.customized_copy( # anomalous_flag=not self.params.merge_anomalous, # crystal_symmetry=self.miller_set.crystal_symmetry() # ).map_to_asu() #observations_original_index = observations_original_index.customized_copy( # anomalous_flag=not self.params.merge_anomalous, # crystal_symmetry=self.miller_set.crystal_symmetry() # ) observations = observations.customized_copy(anomalous_flag=False).map_to_asu() print "Step 4. Filter on global resolution and map to asu" #observations.show_summary(f=out, prefix=" ") from rstbx.dials_core.integration_core import show_observations show_observations(observations) print "Step 6. Match to reference intensities, filter by correlation, filter out negative intensities." assert len(observations_original_index.indices()) \ == len(observations.indices()) # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. self.miller_set.show_summary(prefix="mset ") matches = match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) slope = 1.0 offset = 0.0 print result.get("sa_parameters")[0] have_sa_params = ( type(result.get("sa_parameters")[0]) == type(dict()) ) observations_original_index_indices = observations_original_index.indices() print result.keys() kwargs = {'wavelength': wavelength, 'beam_x': result['xbeam'], 'beam_y': result['ybeam'], 'distance': result['distance'], 'slope': slope, 'offset': offset, 'unique_file_name': timestamp, 'eventstamp':timestamp, 'sifoil': 0.0} trial_id = self.get_trial_id(cursor) run_id = self.get_run_id(cursor) kwargs["trials_id"] = trial_id kwargs["rungroups_id"] = self.rungroup_id kwargs["runs_run_id"] = run_id kwargs["isoforms_isoform_id"] = self.isoform_id res_ori_direct = matrix.sqr( observations.unit_cell().orthogonalization_matrix()).transpose().elems kwargs['res_ori_1'] = res_ori_direct[0] kwargs['res_ori_2'] = res_ori_direct[1] kwargs['res_ori_3'] = res_ori_direct[2] kwargs['res_ori_4'] = res_ori_direct[3] kwargs['res_ori_5'] = res_ori_direct[4] kwargs['res_ori_6'] = res_ori_direct[5] kwargs['res_ori_7'] = res_ori_direct[6] kwargs['res_ori_8'] = res_ori_direct[7] kwargs['res_ori_9'] = res_ori_direct[8] kwargs['mosaic_block_rotation'] = result.get("ML_half_mosaicity_deg",[float("NaN")])[0] kwargs['mosaic_block_size'] = result.get("ML_domain_size_ang",[float("NaN")])[0] kwargs['ewald_proximal_volume'] = result.get("ewald_proximal_volume",[float("NaN")])[0] sql, parameters = self._insert( table='`%s_frames`' % self.db_experiment_tag, **kwargs) print sql print parameters results = {'frame':[sql, parameters, kwargs]} if do_inserts: cursor.execute(sql, parameters[0]) frame_id = cursor.lastrowid else: frame_id = None xypred = result["mapped_predictions"][0] indices = flex.size_t([pair[1] for pair in matches.pairs()]) sel_observations = flex.intersection( size=observations.data().size(), iselections=[indices]) set_original_hkl = observations_original_index_indices.select( flex.intersection( size=observations_original_index_indices.size(), iselections=[indices])) set_xypred = xypred.select( flex.intersection( size=xypred.size(), iselections=[indices])) ''' debugging printout print len(observations.data()) print len(indices) print len(sel_observations) for x in xrange(len(observations.data())): print x,observations.indices().select(sel_observations)[x], print set_original_hkl[x], index_into_hkl_id = matches.pairs()[x][0] print index_into_hkl_id, print self.miller_set.indices()[index_into_hkl_id], cursor.execute('SELECT H,K,L FROM %s_hkls WHERE hkl_id = %d'%( self.db_experiment_tag, self.miller_set_id[index_into_hkl_id])) print cursor.fetchall()[0] ''' print "Adding %d observations for this frame"%(len(sel_observations)) kwargs = {'hkls_id': self.miller_set_id.select(flex.size_t([pair[0] for pair in matches.pairs()])), 'i': observations.data().select(sel_observations), 'sigi': observations.sigmas().select(sel_observations), 'detector_x_px': [xy[0] for xy in set_xypred], 'detector_y_px': [xy[1] for xy in set_xypred], 'frames_id': [frame_id] * len(matches.pairs()), 'overload_flag': [0] * len(matches.pairs()), 'original_h': [hkl[0] for hkl in set_original_hkl], 'original_k': [hkl[1] for hkl in set_original_hkl], 'original_l': [hkl[2] for hkl in set_original_hkl], 'frames_rungroups_id': [self.rungroup_id] * len(matches.pairs()), 'frames_trials_id': [trial_id] * len(matches.pairs()), 'panel': [0] * len(matches.pairs()) } if do_inserts: # For MySQLdb executemany() is six times slower than a single big # execute() unless the "values" keyword is given in lowercase # (http://sourceforge.net/p/mysql-python/bugs/305). # # See also merging_database_sqlite3._insert() query = ("INSERT INTO `%s_observations` (" % self.db_experiment_tag) \ + ", ".join(kwargs.keys()) + ") values (" \ + ", ".join(["%s"] * len(kwargs.keys())) + ")" try: parameters = zip(*kwargs.values()) except TypeError: parameters = [kwargs.values()] cursor.executemany(query, parameters) #print "done execute many" #print cursor._last_executed results['observations'] = [query, parameters, kwargs] else: # since frame_id isn't valid in the query here, don't include a sql statement or parameters array in the results results['observations'] = [None, None, kwargs] return results
def determine_polar(self, observations_original, iparams, pickle_filename, pres=None): """ Determine polarity based on input data. The function still needs isomorphous reference so, if flag_polar is True, miller_array_iso must be supplied in input file. """ if iparams.indexing_ambiguity.flag_on == False: return "h,k,l", 0, 0 cc_asu = 0 cc_rev = 0 if iparams.indexing_ambiguity.index_basis_in is not None: if iparams.indexing_ambiguity.index_basis_in.endswith("mtz"): # use reference mtz file to determine polarity from iotbx import reflection_file_reader reflection_file_polar = reflection_file_reader.any_reflection_file( iparams.indexing_ambiguity.index_basis_in ) miller_arrays_polar = reflection_file_polar.as_miller_arrays() miller_array_polar = miller_arrays_polar[0] miller_array_polar = miller_array_polar.resolution_filter( d_min=iparams.indexing_ambiguity.d_min, d_max=iparams.indexing_ambiguity.d_max ) # for post-refinement, apply the scale factors and partiality first if pres is not None: # observations_original = pres.observations_original.deep_copy() two_theta = observations_original.two_theta(wavelength=pres.wavelength).data() from mod_leastsqr import calc_partiality_anisotropy_set alpha_angle = flex.double([0] * len(observations_original.indices())) spot_pred_x_mm = flex.double([0] * len(observations_original.indices())) spot_pred_y_mm = flex.double([0] * len(observations_original.indices())) detector_distance_mm = pres.detector_distance_mm partiality, dummy, dummy, dummy = calc_partiality_anisotropy_set( pres.unit_cell, 0, 0, observations_original.indices(), pres.ry, pres.rz, pres.r0, pres.re, two_theta, alpha_angle, pres.wavelength, pres.crystal_orientation, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence, ) # partiality = pres.partiality sin_theta_over_lambda_sq = ( observations_original.two_theta(pres.wavelength).sin_theta_over_lambda_sq().data() ) I_full = flex.double( observations_original.data() / (pres.G * flex.exp(flex.double(-2 * pres.B * sin_theta_over_lambda_sq)) * partiality) ) sigI_full = flex.double( observations_original.sigmas() / (pres.G * flex.exp(flex.double(-2 * pres.B * sin_theta_over_lambda_sq)) * partiality) ) observations_original = observations_original.customized_copy(data=I_full, sigmas=sigI_full) observations_asu = observations_original.map_to_asu() observations_rev = self.get_observations_non_polar( observations_original, iparams.indexing_ambiguity.assigned_basis ) matches = miller.match_multi_indices( miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_asu.indices() ) I_ref_match = flex.double([miller_array_polar.data()[pair[0]] for pair in matches.pairs()]) I_obs_match = flex.double([observations_asu.data()[pair[1]] for pair in matches.pairs()]) cc_asu = flex.linear_correlation(I_ref_match, I_obs_match).coefficient() n_refl_asu = len(matches.pairs()) matches = miller.match_multi_indices( miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_rev.indices() ) I_ref_match = flex.double([miller_array_polar.data()[pair[0]] for pair in matches.pairs()]) I_obs_match = flex.double([observations_rev.data()[pair[1]] for pair in matches.pairs()]) cc_rev = flex.linear_correlation(I_ref_match, I_obs_match).coefficient() n_refl_rev = len(matches.pairs()) polar_hkl = "h,k,l" if cc_rev > (cc_asu * 1.01): polar_hkl = iparams.indexing_ambiguity.assigned_basis else: # use basis in the given input file polar_hkl = "h,k,l" basis_pickle = pickle.load(open(iparams.indexing_ambiguity.index_basis_in, "rb")) if pickle_filename in basis_pickle: polar_hkl = basis_pickle[pickle_filename] else: # set default polar_hkl to h,k,l polar_hkl = "h,k,l" return polar_hkl, cc_asu, cc_rev
def postrefine_by_frame(self, frame_no, pickle_filename, iph, miller_array_ref): #1. Prepare data observations_pickle = pickle.load(open(pickle_filename,"rb")) crystal_init_orientation = observations_pickle["current_orientation"][0] wavelength = observations_pickle["wavelength"] #grab img. name imgname = pickle_filename if iph.file_name_in_img != '': fh = file_handler() imgname = fh.get_imgname_from_pickle_filename(iph.file_name_in_img, pickle_filename) observations_original, alpha_angle_obs = self.organize_input(observations_pickle, iph) if observations_original is None: print frame_no, '-fail obs is none' return None #2. Determine polarity - always do this even if flag_polar = False #the function will take care of it. polar_hkl, cc_iso_raw_asu, cc_iso_raw_rev = self.determine_polar(observations_original, iph, pickle_filename) #3. Select data for post-refinement (only select indices that are common with the reference set observations_non_polar = self.get_observations_non_polar(observations_original, polar_hkl) matches = miller.match_multi_indices( miller_indices_unique=miller_array_ref.indices(), miller_indices=observations_non_polar.indices()) I_ref_match = flex.double([miller_array_ref.data()[pair[0]] for pair in matches.pairs()]) miller_indices_ref_match = flex.miller_index((miller_array_ref.indices()[pair[0]] for pair in matches.pairs())) I_obs_match = flex.double([observations_non_polar.data()[pair[1]] for pair in matches.pairs()]) sigI_obs_match = flex.double([observations_non_polar.sigmas()[pair[1]] for pair in matches.pairs()]) miller_indices_original_obs_match = flex.miller_index((observations_original.indices()[pair[1]] for pair in matches.pairs())) alpha_angle_set = flex.double([alpha_angle_obs[pair[1]] for pair in matches.pairs()]) references_sel = miller_array_ref.customized_copy(data=I_ref_match, indices=miller_indices_ref_match) observations_original_sel = observations_original.customized_copy(data=I_obs_match, sigmas=sigI_obs_match, indices=miller_indices_original_obs_match) #4. Do least-squares refinement lsqrh = leastsqr_handler() refined_params, se_params, stats, partiality_sel, SE_I, var_I_p, var_k, var_p = lsqrh.optimize(I_ref_match, observations_original_sel, wavelength, crystal_init_orientation, alpha_angle_set, iph) if SE_I is None: print 'frame', frame_no, ' - failed' return None else: pres = postref_results() observations_non_polar_sel = self.get_observations_non_polar(observations_original_sel, polar_hkl) pres.set_params(observations = observations_non_polar_sel, refined_params=refined_params, se_params=se_params, stats=stats, partiality=partiality_sel, frame_no=frame_no, pickle_filename=pickle_filename, wavelength=wavelength, SE_I=SE_I, var_I_p=var_I_p, var_k=var_k, var_p=var_p) print 'frame %6.0f'%pres.frame_no, ' SE=%7.2f R-sq=%7.2f CC=%7.2f'%pres.stats, polar_hkl return pres
def get_observations_non_polar(self, observations_original, observations_filename): """ Determine polarity based on input data. The function still needs isomorphous reference so, if flag_polar is True, miller_array_iso must be supplied in input file. """ observations_asu = observations_original.map_to_asu() if self.iparams.indexing_ambiguity.flag_on == False: return observations_asu pickle_filename_arr = observations_filename.split('/') if len(pickle_filename_arr) == 1: pickle_filename_only = pickle_filename_arr[0] else: pickle_filename_only = pickle_filename_arr[len(pickle_filename_arr) - 1] cc_asu = 0 cc_rev = 0 if self.iparams.indexing_ambiguity.index_basis_in.endswith('mtz'): #use reference mtz file to determine polarity from mod_util import utility_handler util_hdlr = utility_handler() flag_mtz_found, miller_array_polar = util_hdlr.get_miller_array_from_mtz( self.iparams.indexing_ambiguity.index_basis_in) if self.iparams.target_anomalous_flag: miller_array_polar = miller_array_polar.generate_bijvoet_mates( ) miller_array_polar = miller_array_polar.resolution_filter( d_min=self.iparams.indexing_ambiguity.d_min, d_max=self.iparams.indexing_ambiguity.d_max) observations_asu = observations_original.map_to_asu() observations_rev = self.get_observations_non_polar( observations_original, self.iparams.indexing_ambiguity.assigned_basis) matches = miller.match_multi_indices( miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_asu.indices()) I_ref_match = flex.double([ miller_array_polar.data()[pair[0]] for pair in matches.pairs() ]) I_obs_match = flex.double( [observations_asu.data()[pair[1]] for pair in matches.pairs()]) cc_asu = np.corrcoef(I_ref_match, I_obs_match)[0, 1] n_refl_asu = len(matches.pairs()) matches = miller.match_multi_indices( miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_rev.indices()) I_ref_match = flex.double([ miller_array_polar.data()[pair[0]] for pair in matches.pairs() ]) I_obs_match = flex.double( [observations_rev.data()[pair[1]] for pair in matches.pairs()]) cc_rev = np.corrcoef(I_ref_match, I_obs_match)[0, 1] n_refl_rev = len(matches.pairs()) polar_hkl = 'h,k,l' if cc_rev > cc_asu: polar_hkl = self.iparams.indexing_ambiguity.assigned_basis else: #use basis in the given input file polar_hkl = 'h,k,l' basis_pickle = pickle.load( open(self.iparams.indexing_ambiguity.index_basis_in, "rb")) if observations_filename in basis_pickle: polar_hkl = basis_pickle[observations_filename] #return observations with correct polarity if polar_hkl == 'h,k,l': return observations_asu else: from cctbx import sgtbx cb_op = sgtbx.change_of_basis_op(polar_hkl) observations_rev = observations_asu.change_basis( cb_op).map_to_asu() return observations_rev
def __init__(self, datadir, work_params, plot=False, esd_plot=False, half_data_flag=0): casetag = work_params.output.prefix # read the ground truth values back in import cPickle as pickle # it is assumed (for now) that the reference millers contain a complete asymmetric unit # of indices, within the (d_max,d_min) region of interest and possibly outside the region. reference_millers = pickle.load( open(os.path.join(datadir, casetag + "_miller.pickle"), "rb")) experiment_manager = read_experiments(work_params) obs = pickle.load( open(os.path.join(datadir, casetag + "_observation.pickle"), "rb")) print "Read in %d observations" % (len(obs["observed_intensity"])) reference_millers.show_summary(prefix="Miller index file ") print len(obs["frame_lookup"]), len( obs["observed_intensity"]), flex.max( obs['miller_lookup']), flex.max(obs['frame_lookup']) max_frameno = flex.max(obs["frame_lookup"]) from iotbx import mtz mtz_object = mtz.object(file_name=work_params.scaling.mtz_file) #for array in mtz_object.as_miller_arrays(): # this_label = array.info().label_string() # print this_label, array.observation_type() I_sim = mtz_object.as_miller_arrays()[0].as_intensity_array() I_sim.show_summary() MODEL_REINDEX_OP = work_params.model_reindex_op I_sim = I_sim.change_basis(MODEL_REINDEX_OP).map_to_asu() #match up isomorphous (the simulated fake F's) with experimental unique set matches = miller.match_multi_indices( miller_indices_unique=reference_millers.indices(), miller_indices=I_sim.indices()) print "original unique", len(reference_millers.indices()) print "isomorphous set", len(I_sim.indices()) print "pairs", len(matches.pairs()) iso_data = flex.double(len(reference_millers.indices())) for pair in matches.pairs(): iso_data[pair[0]] = I_sim.data()[pair[1]] reference_data = miller.array(miller_set=reference_millers, data=iso_data) reference_data.set_observation_type_xray_intensity() FOBS = prepare_observations_for_scaling( work_params, obs=obs, reference_intensities=reference_data, files=experiment_manager.get_files(), half_data_flag=half_data_flag) I, I_visited, G, G_visited = I_and_G_base_estimate(FOBS, params=work_params) print "I length", len(I), "G length", len( G), "(Reference set; entire asymmetric unit)" assert len(reference_data.data()) == len(I) #presumably these assertions fail when half data are taken for CC1/2 or d_min is cut model_I = reference_data.data()[0:len(I)] T = Timer("%d frames" % (len(G), )) mapper = mapper_factory(xscale6e) minimizer = mapper(I, G, I_visited, G_visited, FOBS, params=work_params, experiments=experiment_manager.get_experiments()) del T minimizer.show_summary() Fit = minimizer.e_unpack() Gstats = flex.mean_and_variance(Fit["G"].select(G_visited == 1)) print "G mean and standard deviation:", Gstats.mean( ), Gstats.unweighted_sample_standard_deviation() if "Bfactor" in work_params.levmar.parameter_flags: Bstats = flex.mean_and_variance(Fit["B"].select(G_visited == 1)) print "B mean and standard deviation:", Bstats.mean( ), Bstats.unweighted_sample_standard_deviation() show_correlation(Fit["I"], model_I, I_visited, "Correlation of I:") Fit_stddev = minimizer.e_unpack_stddev() # XXX FIXME known bug: the length of Fit["G"] could be smaller than the length of experiment_manager.get_files() # Not sure if this has any operational drawbacks. It's a result of half-dataset selection. if plot: plot_it(Fit["I"], model_I, mode="I") if "Rxy" in work_params.levmar.parameter_flags: show_histogram(Fit["Ax"], "Histogram of x rotation (degrees)") show_histogram(Fit["Ay"], "Histogram of y rotation (degrees)") print if esd_plot: minimizer.esd_plot() from cctbx.examples.merging.show_results import show_overall_observations table1, self.n_bins, self.d_min = show_overall_observations( Fit["I"], Fit_stddev["I"], I_visited, reference_data, FOBS, title="Statistics for all reflections", work_params=work_params) self.FSIM = FOBS self.ordered_intensities = reference_data self.reference_millers = reference_millers self.Fit_I = Fit["I"] self.Fit_I_stddev = Fit_stddev["I"] self.I_visited = I_visited self.Fit = Fit self.experiments = experiment_manager
def run(self, experiments, reflections): self.logger.log_step_time("SCALE_FRAMES") new_experiments = ExperimentList() new_reflections = flex.reflection_table() # scale experiments, one at a time. Reject experiments that do not correlate with the reference or fail to scale. results = [] slopes = [] correlations = [] high_res_experiments = 0 experiments_rejected_because_of_low_signal = 0 experiments_rejected_because_of_low_correlation_with_reference = 0 target_symm = symmetry(unit_cell = self.params.scaling.unit_cell, space_group_info = self.params.scaling.space_group) for experiment in experiments: exp_reflections = reflections.select(reflections['exp_id'] == experiment.identifier) # Build a miller array for the experiment reflections exp_miller_indices = miller.set(target_symm, exp_reflections['miller_index_asymmetric'], True) exp_intensities = miller.array(exp_miller_indices, exp_reflections['intensity.sum.value'], flex.double(flex.sqrt(exp_reflections['intensity.sum.variance']))) model_intensities = self.params.scaling.i_model # Extract an array of HKLs from the model to match the experiment HKLs matching_indices = miller.match_multi_indices(miller_indices_unique = model_intensities.indices(), miller_indices = exp_intensities.indices()) # Least squares if self.params.scaling.mark0.fit_reference_to_experiment: # RB: in cxi-merge we fit reference to experiment, but we should really do it the other way result = self.fit_reference_to_experiment(model_intensities, exp_intensities, matching_indices) else: result = self.fit_experiment_to_reference(model_intensities, exp_intensities, matching_indices) if result.error == scaling_result.err_low_signal: experiments_rejected_because_of_low_signal += 1 continue elif result.error == scaling_result.err_low_correlation: experiments_rejected_because_of_low_correlation_with_reference += 1 continue slopes.append(result.slope) correlations.append(result.correlation) if self.params.output.log_level == 0: self.logger.log("Experiment ID: %s; Slope: %f; Correlation %f"%(experiment.identifier, result.slope, result.correlation)) # count high resolution experiments if exp_intensities.d_min() <= self.params.merging.d_min: high_res_experiments += 1 # apply scale factors if not self.params.postrefinement.enable: if self.params.scaling.mark0.fit_reference_to_experiment: exp_reflections['intensity.sum.value'] /= result.slope exp_reflections['intensity.sum.variance'] /= (result.slope**2) else: exp_reflections['intensity.sum.value'] *= result.slope exp_reflections['intensity.sum.variance'] *= (result.slope**2) new_experiments.append(experiment) new_reflections.extend(exp_reflections) rejected_experiments = len(experiments) - len(new_experiments) assert rejected_experiments == experiments_rejected_because_of_low_signal + \ experiments_rejected_because_of_low_correlation_with_reference reflections_removed_because_of_rejected_experiments = reflections.size() - new_reflections.size() self.logger.log("Experiments rejected because of low signal: %d"%experiments_rejected_because_of_low_signal) self.logger.log("Experiments rejected because of low correlation with reference: %d"%experiments_rejected_because_of_low_correlation_with_reference) self.logger.log("Reflections rejected because of rejected experiments: %d"%reflections_removed_because_of_rejected_experiments) self.logger.log("High resolution experiments: %d"%high_res_experiments) if self.params.postrefinement.enable: self.logger.log("Note: scale factors were not applied, because postrefinement is enabled") # MPI-reduce all counts comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI total_experiments_rejected_because_of_low_signal = comm.reduce(experiments_rejected_because_of_low_signal, MPI.SUM, 0) total_experiments_rejected_because_of_low_correlation_with_reference = comm.reduce(experiments_rejected_because_of_low_correlation_with_reference, MPI.SUM, 0) total_reflections_removed_because_of_rejected_experiments = comm.reduce(reflections_removed_because_of_rejected_experiments, MPI.SUM, 0) total_high_res_experiments = comm.reduce(high_res_experiments, MPI.SUM, 0) all_slopes = comm.reduce(slopes, MPI.SUM, 0) all_correlations = comm.reduce(correlations, MPI.SUM, 0) # rank 0: log data statistics if self.mpi_helper.rank == 0: self.logger.main_log('Experiments rejected because of low signal: %d'%total_experiments_rejected_because_of_low_signal) self.logger.main_log('Experiments rejected because of low correlation with reference: %d'%total_experiments_rejected_because_of_low_correlation_with_reference) self.logger.main_log('Reflections rejected because of rejected experiments: %d'%total_reflections_removed_because_of_rejected_experiments) self.logger.main_log('Experiments with high resolution of %5.2f Angstrom or better: %d'%(self.params.merging.d_min, total_high_res_experiments)) stats_slope = flex.mean_and_variance(flex.double(all_slopes)) stats_correlation = flex.mean_and_variance(flex.double(all_correlations)) self.logger.main_log('Average experiment scale factor wrt reference: %f; correlation: %f +/- %f'%(stats_slope.mean(),stats_correlation.mean(), stats_correlation.unweighted_sample_standard_deviation())) if self.params.postrefinement.enable: self.logger.main_log("Note: scale factors were not applied, because postrefinement is enabled") self.logger.log_step_time("SCALE_FRAMES", True) return new_experiments, new_reflections
def output_mtz_files(self, results, iph, output_mtz_file_prefix, avg_mode): partiality_filter = 0.1 sigma_filter = 8 if avg_mode == "average": cc_thres = 0 else: cc_thres = iph.frame_accept_min_cc # prepare data for merging miller_indices_all = flex.miller_index() I_all = flex.double() sigI_all = flex.double() G_all = flex.double() B_all = flex.double() k_all = flex.double() p_all = flex.double() SE_I_all = flex.double() SE_all = flex.double() sin_sq_all = flex.double() cn_good_frame = 0 cn_bad_frame_uc = 0 cn_bad_frame_cc = 0 for pres in results: if pres is not None: fh = file_handler() img_filename = fh.get_imgname_from_pickle_filename(iph.file_name_in_img, pres.pickle_filename) # check cc if pres.stats[2] >= cc_thres: # check unit-cell if ( abs(pres.uc_params[0] - iph.target_unit_cell[0]) <= iph.uc_len_tol and abs(pres.uc_params[1] - iph.target_unit_cell[1]) <= iph.uc_len_tol and abs(pres.uc_params[2] - iph.target_unit_cell[2]) <= iph.uc_len_tol and abs(pres.uc_params[3] - iph.target_unit_cell[3]) <= iph.uc_angle_tol and abs(pres.uc_params[4] - iph.target_unit_cell[4]) <= iph.uc_angle_tol and abs(pres.uc_params[5] - iph.target_unit_cell[5]) <= iph.uc_angle_tol ): cn_good_frame += 1 sin_theta_over_lambda_sq = ( pres.observations.two_theta(wavelength=pres.wavelength).sin_theta_over_lambda_sq().data() ) for miller_index, i_obs, sigi_obs, p, se_i, sin_sq in zip( pres.observations.indices(), pres.observations.data(), pres.observations.sigmas(), pres.partiality, pres.SE_I, sin_theta_over_lambda_sq, ): miller_indices_all.append(miller_index) I_all.append(i_obs) sigI_all.append(sigi_obs) G_all.append(pres.G) B_all.append(pres.B) p_all.append(p) SE_I_all.append(se_i) sin_sq_all.append(sin_sq) SE_all.append(pres.stats[0]) print pres.frame_no, img_filename, " merged" else: print pres.frame_no, img_filename, " discarded - unit-cell exceeds the limits (%6.2f %6.2f %6.2f %5.2f %5.2f %5.2f)" % ( pres.uc_params[0], pres.uc_params[1], pres.uc_params[2], pres.uc_params[3], pres.uc_params[4], pres.uc_params[5], ) cn_bad_frame_uc += 1 else: print pres.frame_no, img_filename, " discarded - C.C. too low (C.C.=%5.2f%%)" % ( pres.stats[2] * 100 ) cn_bad_frame_cc += 1 # plot stats self.plot_stats(results, iph, iph.uc_len_tol, iph.uc_angle_tol) # calculate average unit cell uc_mean = self.calc_mean_unit_cell(results, iph, iph.uc_len_tol, iph.uc_angle_tol) unit_cell_mean = unit_cell((uc_mean[0], uc_mean[1], uc_mean[2], uc_mean[3], uc_mean[4], uc_mean[5])) # from all observations merge them crystal_symmetry = crystal.symmetry( unit_cell=(uc_mean[0], uc_mean[1], uc_mean[2], uc_mean[3], uc_mean[4], uc_mean[5]), space_group_symbol=iph.target_space_group, ) miller_set_all = miller.set( crystal_symmetry=crystal_symmetry, indices=miller_indices_all, anomalous_flag=iph.target_anomalous_flag ) miller_array_all = miller_set_all.array(data=I_all, sigmas=sigI_all).set_observation_type_xray_intensity() # sort reflections according to asymmetric-unit symmetry hkl perm = miller_array_all.sort_permutation(by_value="packed_indices") miller_indices_all_sort = miller_array_all.indices().select(perm) I_obs_all_sort = miller_array_all.data().select(perm) sigI_obs_all_sort = miller_array_all.sigmas().select(perm) d_spacings_sort = miller_array_all.d_spacings().data().select(perm) G_all_sort = G_all.select(perm) B_all_sort = B_all.select(perm) p_all_sort = p_all.select(perm) SE_I_all_sort = SE_I_all.select(perm) sin_sq_all_sort = sin_sq_all.select(perm) SE_all_sort = SE_all.select(perm) refl_now = 0 miller_indices_merge = flex.miller_index() I_merge = flex.double() sigI_merge = flex.double() stat_all = [] I_even = flex.double() I_odd = flex.double() while refl_now < len(I_obs_all_sort) - 1: miller_index_group = miller_indices_all_sort[refl_now] I_obs_group = flex.double() sigI_obs_group = flex.double() d_spacings_group = flex.double() G_group = flex.double() B_group = flex.double() p_group = flex.double() SE_I_group = flex.double() sin_sq_group = flex.double() SE_group = flex.double() for i in range(refl_now, len(I_obs_all_sort)): if ( miller_indices_all_sort[i][0] == miller_index_group[0] and miller_indices_all_sort[i][1] == miller_index_group[1] and miller_indices_all_sort[i][2] == miller_index_group[2] ): # select only reflections with higher partiality if p_all_sort[i] >= partiality_filter: I_obs_group.append(I_obs_all_sort[i]) sigI_obs_group.append(sigI_obs_all_sort[i]) d_spacings_group.append(d_spacings_sort[i]) G_group.append(G_all_sort[i]) B_group.append(B_all_sort[i]) p_group.append(p_all_sort[i]) SE_I_group.append(SE_I_all_sort[i]) sin_sq_group.append(sin_sq_all_sort[i]) SE_group.append(SE_all_sort[i]) if i == (len(I_obs_all_sort) - 1): refl_now = i break else: refl_now = i break if len(I_obs_group) > 0: I_avg, sigI_avg, stat, I_avg_even, I_avg_odd = self.calc_average_I_sigI( I_obs_group, sigI_obs_group, G_group, B_group, p_group, SE_I_group, sin_sq_group, avg_mode, SE_group, iph, d_spacings_group, ) if math.isnan(stat[0]) or math.isinf(stat[0]) or math.isnan(stat[1]) or math.isinf(stat[1]): print miller_index_group, " not merged (Qw=%.4g/%.4g)" % (stat[0], stat[1]) else: miller_indices_merge.append(miller_index_group) I_merge.append(I_avg) sigI_merge.append(sigI_avg) stat_all.append(stat) I_even.append(I_avg_even) I_odd.append(I_avg_odd) # output mtz file and report binning stat miller_set_merge = miller.set( crystal_symmetry=crystal_symmetry, indices=miller_indices_merge, anomalous_flag=iph.target_anomalous_flag ) miller_array_merge = miller_set_merge.array( data=I_merge, sigmas=sigI_merge ).set_observation_type_xray_intensity() # remove outliers binner_merge = miller_array_merge.setup_binner(n_bins=iph.n_bins) binner_merge_indices = binner_merge.bin_indices() miller_indices_merge_filter = flex.miller_index() I_merge_filter = flex.double() sigI_merge_filter = flex.double() I_even_filter = flex.double() I_odd_filter = flex.double() stat_filter = [] i_seq = flex.int([j for j in range(len(binner_merge_indices))]) for i in range(1, iph.n_bins + 1): i_binner = binner_merge_indices == i if len(miller_array_merge.data().select(i_binner)) > 0: I_obs_bin = miller_array_merge.data().select(i_binner) sigI_obs_bin = miller_array_merge.sigmas().select(i_binner) miller_indices_bin = miller_array_merge.indices().select(i_binner) stat_bin = [stat_all[j] for j in i_seq.select(i_binner)] I_even_bin = I_even.select(i_binner) I_odd_bin = I_odd.select(i_binner) i_filter = flex.abs((I_obs_bin - np.median(I_obs_bin)) / np.median(I_obs_bin)) < sigma_filter I_obs_bin_filter = I_obs_bin.select(i_filter) sigI_obs_bin_filter = sigI_obs_bin.select(i_filter) miller_indices_bin_filter = miller_indices_bin.select(i_filter) i_seq_bin = flex.int([j for j in range(len(i_filter))]) stat_bin_filter = [stat_bin[j] for j in i_seq_bin.select(i_filter)] I_even_bin_filter = I_even_bin.select(i_filter) I_odd_bin_filter = I_odd_bin.select(i_filter) for i_obs, sigi_obs, miller_index, stat, i_even, i_odd in zip( I_obs_bin_filter, sigI_obs_bin_filter, miller_indices_bin_filter, stat_bin_filter, I_even_bin_filter, I_odd_bin_filter, ): I_merge_filter.append(i_obs) sigI_merge_filter.append(sigi_obs) miller_indices_merge_filter.append(miller_index) stat_filter.append(stat) I_even_filter.append(i_even) I_odd_filter.append(i_odd) miller_set_merge = miller.set( crystal_symmetry=crystal_symmetry, indices=miller_indices_merge_filter, anomalous_flag=iph.target_anomalous_flag, ) miller_array_merge = miller_set_merge.array( data=I_merge_filter, sigmas=sigI_merge_filter ).set_observation_type_xray_intensity() if output_mtz_file_prefix != "": mtz_dataset_merge = miller_array_merge.as_mtz_dataset(column_root_label="IOBS") mtz_dataset_merge.mtz_object().write(file_name=output_mtz_file_prefix + "_merge.mtz") # report binning stats miller_array_template_asu = miller_array_merge.complete_set().resolution_filter( d_min=iph.d_min, d_max=iph.d_max ) binner_template_asu = miller_array_template_asu.setup_binner(n_bins=iph.n_bins) binner_template_asu_indices = binner_template_asu.bin_indices() csv_out = "" csv_out += "Bin, Low, High, Completeness, <N_obs>, Qmeas, Qw, CC1/2, N_ind, CCiso, N_ind, <I/sigI>\n" txt_out = "\n" txt_out += "Summary for " + output_mtz_file_prefix + "_merge.mtz\n" txt_out += "Bin Resolution Range Completeness <N_obs> |Qmeas Qw CC1/2 N_ind |CCiso N_ind| <I/sigI>\n" txt_out += ( "--------------------------------------------------------------------------------------------------------\n" ) sum_r_meas_w_top = 0 sum_r_meas_w_btm = 0 sum_r_meas_top = 0 sum_r_meas_btm = 0 for i in range(1, iph.n_bins + 1): i_binner = binner_template_asu_indices == i miller_indices_bin = miller_array_template_asu.indices().select(i_binner) matches_template = miller.match_multi_indices( miller_indices_unique=miller_indices_bin, miller_indices=miller_array_merge.indices() ) I_bin = flex.double([miller_array_merge.data()[pair[1]] for pair in matches_template.pairs()]) sigI_bin = flex.double([miller_array_merge.sigmas()[pair[1]] for pair in matches_template.pairs()]) miller_indices_obs_bin = flex.miller_index( [miller_array_merge.indices()[pair[1]] for pair in matches_template.pairs()] ) if len(I_bin) == 0: mean_i_over_sigi_bin = 0 multiplicity_bin = 0 r_meas_w_bin = 0 r_meas_bin = 0 cc12 = 0 else: mean_i_over_sigi_bin = flex.mean(I_bin / sigI_bin) stat_bin = [stat_filter[pair[1]] for pair in matches_template.pairs()] sum_r_meas_w_top_bin = 0 sum_r_meas_w_btm_bin = 0 sum_r_meas_top_bin = 0 sum_r_meas_btm_bin = 0 sum_mul_bin = 0 for stat in stat_bin: r_meas_w_top, r_meas_w_btm, r_meas_top, r_meas_btm, mul = stat sum_r_meas_w_top_bin += r_meas_w_top sum_r_meas_w_btm_bin += r_meas_w_btm sum_r_meas_top_bin += r_meas_top sum_r_meas_btm_bin += r_meas_btm sum_mul_bin += mul sum_r_meas_w_top += r_meas_w_top sum_r_meas_w_btm += r_meas_w_btm sum_r_meas_top += r_meas_top sum_r_meas_btm += r_meas_btm multiplicity_bin = sum_mul_bin / len(I_bin) if sum_r_meas_w_btm_bin > 0: r_meas_w_bin = sum_r_meas_w_top_bin / sum_r_meas_w_btm_bin else: r_meas_w_bin = float("Inf") if sum_r_meas_btm_bin > 0: r_meas_bin = sum_r_meas_top_bin / sum_r_meas_btm_bin else: r_meas_bin = float("Inf") I_even_filter_bin = flex.double([I_even_filter[pair[1]] for pair in matches_template.pairs()]) I_odd_filter_bin = flex.double([I_odd_filter[pair[1]] for pair in matches_template.pairs()]) # for cc1/2, use only non-zero I (zero when there is only one observation) i_even_filter_sel = I_even_filter_bin > 0 n_refl_cc12_bin = len(I_even_filter_bin.select(i_even_filter_sel)) cc12_bin = 0 if n_refl_cc12_bin > 0: cc12_bin = np.corrcoef( I_even_filter_bin.select(i_even_filter_sel), I_odd_filter_bin.select(i_even_filter_sel) )[0, 1] completeness = len(miller_indices_obs_bin) / len(miller_indices_bin) # calculate CCiso cc_iso_bin = 0 n_refl_cciso_bin = 0 if iph.file_name_iso_mtz != "": matches_iso = miller.match_multi_indices( miller_indices_unique=iph.miller_array_iso.indices(), miller_indices=miller_indices_obs_bin ) I_iso = flex.double([iph.miller_array_iso.data()[pair[0]] for pair in matches_iso.pairs()]) I_merge_match_iso = flex.double([I_bin[pair[1]] for pair in matches_iso.pairs()]) n_refl_cciso_bin = len(matches_iso.pairs()) if len(matches_iso.pairs()) > 0: cc_iso_bin = np.corrcoef(I_merge_match_iso, I_iso)[0, 1] if iph.flag_plot: plt.scatter(I_iso, I_merge_match_iso, s=10, marker="x", c="r") plt.title( "bin %3.0f CC=%.4g meanI=%.4g std=%.4g sqrt_meanI=%.4g mul=%.4g" % ( i, cc_iso_bin, np.mean(I_merge_match_iso), np.std(I_merge_match_iso), math.sqrt(np.mean(I_merge_match_iso)), math.sqrt(np.mean(I_merge_match_iso)) * 2.5, ) ) plt.xlabel("I_ref") plt.ylabel("I_obs") plt.show() txt_out += "%02d %7.2f - %7.2f %5.1f %6.0f / %6.0f %7.2f %7.2f %7.2f %7.2f %6.0f %7.2f %6.0f %7.2f" % ( i, binner_template_asu.bin_d_range(i)[0], binner_template_asu.bin_d_range(i)[1], completeness * 100, len(miller_indices_obs_bin), len(miller_indices_bin), multiplicity_bin, r_meas_bin * 100, r_meas_w_bin * 100, cc12_bin * 100, n_refl_cc12_bin, cc_iso_bin * 100, n_refl_cciso_bin, mean_i_over_sigi_bin, ) txt_out += "\n" csv_out += "%02d, %7.2f, %7.2f, %5.1f, %6.0f, %7.2f, %7.2f, %7.2f, %7.2f, %6.0f, %7.2f, %6.0f, %7.2f\n" % ( i, binner_template_asu.bin_d_range(i)[0], binner_template_asu.bin_d_range(i)[1], completeness * 100 / len(miller_indices_obs_bin), len(miller_indices_bin), multiplicity_bin, r_meas_bin * 100, r_meas_w_bin * 100, cc12_bin * 100, n_refl_cc12_bin, cc_iso_bin * 100, n_refl_cciso_bin, mean_i_over_sigi_bin, ) # calculate CCiso cc_iso = 0 n_refl_iso = 0 if iph.file_name_iso_mtz != "": matches_iso = miller.match_multi_indices( miller_indices_unique=iph.miller_array_iso.indices(), miller_indices=miller_array_merge.indices() ) I_iso = flex.double([iph.miller_array_iso.data()[pair[0]] for pair in matches_iso.pairs()]) I_merge_match_iso = flex.double([miller_array_merge.data()[pair[1]] for pair in matches_iso.pairs()]) if len(matches_iso.pairs()) > 0: cc_iso = np.corrcoef(I_merge_match_iso, I_iso)[0, 1] n_refl_iso = len(matches_iso.pairs()) if iph.flag_plot: plt.scatter(I_iso, I_merge_match_iso, s=10, marker="x", c="r") plt.title("CC=%.4g" % (cc_iso)) plt.xlabel("I_ref") plt.ylabel("I_obs") plt.show() # calculate cc12 i_even_filter_sel = I_even_filter > 0 cc12 = np.corrcoef(I_even_filter.select(i_even_filter_sel), I_odd_filter.select(i_even_filter_sel))[0, 1] # calculate Qmeas and Qw if sum_r_meas_w_btm > 0: r_meas_w = sum_r_meas_w_top / sum_r_meas_w_btm else: r_meas_w = float("Inf") if sum_r_meas_btm > 0: r_meas = sum_r_meas_top / sum_r_meas_btm else: r_meas = float("Inf") txt_out += ( "--------------------------------------------------------------------------------------------------------\n" ) txt_out += " TOTAL %5.1f %6.0f / %6.0f %7.2f %7.2f %7.2f %7.2f %6.0f %7.2f %6.0f %7.2f\n" % ( (len(miller_array_merge.indices()) / len(miller_array_template_asu.indices())) * 100, len(miller_array_merge.indices()), len(miller_array_template_asu.indices()), len(miller_indices_all) / len(miller_array_merge.data()), r_meas * 100, r_meas_w * 100, cc12 * 100, len(I_even_filter.select(i_even_filter_sel)), cc_iso * 100, n_refl_iso, np.mean(miller_array_merge.data() / miller_array_merge.sigmas()), ) txt_out += ( "--------------------------------------------------------------------------------------------------------\n" ) txt_out += "No. of total observed reflections: %9.0f from %5.0f frames" % ( len(miller_indices_all), cn_good_frame, ) txt_out += "\n" txt_out += ( "No. of discarded frames - initial unit cell exceeds the limit: %5.0f frames; C.C. too low: %5.0f" % (cn_bad_frame_uc, cn_bad_frame_cc) ) txt_out += "\n" txt_out += "Average unit-cell parameters: (%6.2f, %6.2f, %6.2f %6.2f, %6.2f, %6.2f)" % ( uc_mean[0], uc_mean[1], uc_mean[2], uc_mean[3], uc_mean[4], uc_mean[5], ) txt_out += "\n" print txt_out return miller_array_merge, txt_out, csv_out
def postrefine_by_frame(self, frame_no, pickle_filename, iparams, miller_array_ref, pres_in, avg_mode): # 1. Prepare data observations_pickle = pickle.load(open(pickle_filename, "rb")) crystal_init_orientation = observations_pickle["current_orientation"][0] wavelength = observations_pickle["wavelength"] pickle_filepaths = pickle_filename.split("/") img_filename_only = pickle_filepaths[len(pickle_filepaths) - 1] txt_exception = " {0:40} ==> ".format(img_filename_only) inputs, txt_organize_input = self.organize_input( observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename ) if inputs is not None: observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm = inputs else: txt_exception += txt_organize_input + "\n" return None, txt_exception # 2. Determine polarity - always do this even if flag_polar = False # the function will take care of it. polar_hkl, cc_iso_raw_asu, cc_iso_raw_rev = self.determine_polar( observations_original, iparams, pickle_filename, pres=pres_in ) # 3. Select data for post-refinement (only select indices that are common with the reference set observations_non_polar = self.get_observations_non_polar(observations_original, polar_hkl) matches = miller.match_multi_indices( miller_indices_unique=miller_array_ref.indices(), miller_indices=observations_non_polar.indices() ) I_ref_match = flex.double([miller_array_ref.data()[pair[0]] for pair in matches.pairs()]) miller_indices_ref_match = flex.miller_index((miller_array_ref.indices()[pair[0]] for pair in matches.pairs())) I_obs_match = flex.double([observations_non_polar.data()[pair[1]] for pair in matches.pairs()]) sigI_obs_match = flex.double([observations_non_polar.sigmas()[pair[1]] for pair in matches.pairs()]) miller_indices_original_obs_match = flex.miller_index( (observations_original.indices()[pair[1]] for pair in matches.pairs()) ) miller_indices_non_polar_obs_match = flex.miller_index( (observations_non_polar.indices()[pair[1]] for pair in matches.pairs()) ) alpha_angle_set = flex.double([alpha_angle[pair[1]] for pair in matches.pairs()]) spot_pred_x_mm_set = flex.double([spot_pred_x_mm[pair[1]] for pair in matches.pairs()]) spot_pred_y_mm_set = flex.double([spot_pred_y_mm[pair[1]] for pair in matches.pairs()]) references_sel = miller_array_ref.customized_copy(data=I_ref_match, indices=miller_indices_ref_match) observations_original_sel = observations_original.customized_copy( data=I_obs_match, sigmas=sigI_obs_match, indices=miller_indices_original_obs_match ) observations_non_polar_sel = observations_non_polar.customized_copy( data=I_obs_match, sigmas=sigI_obs_match, indices=miller_indices_non_polar_obs_match ) # 4. Do least-squares refinement lsqrh = leastsqr_handler() try: refined_params, stats, n_refl_postrefined = lsqrh.optimize( I_ref_match, observations_original_sel, wavelength, crystal_init_orientation, alpha_angle_set, spot_pred_x_mm_set, spot_pred_y_mm_set, iparams, pres_in, observations_non_polar_sel, detector_distance_mm, ) except Exception: txt_exception += "optimization failed.\n" return None, txt_exception # caculate partiality for output (with target_anomalous check) G_fin, B_fin, rotx_fin, roty_fin, ry_fin, rz_fin, r0_fin, re_fin, a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin = ( refined_params ) inputs, txt_organize_input = self.organize_input( observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename ) observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm = inputs observations_non_polar = self.get_observations_non_polar(observations_original, polar_hkl) from cctbx.uctbx import unit_cell uc_fin = unit_cell((a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin)) if pres_in is not None: crystal_init_orientation = pres_in.crystal_orientation two_theta = observations_original.two_theta(wavelength=wavelength).data() from mod_leastsqr import calc_partiality_anisotropy_set partiality_fin, dummy, rs_fin, rh_fin = calc_partiality_anisotropy_set( uc_fin, rotx_fin, roty_fin, observations_original.indices(), ry_fin, rz_fin, r0_fin, re_fin, two_theta, alpha_angle, wavelength, crystal_init_orientation, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence, ) # calculate the new crystal orientation O = sqr(uc_fin.orthogonalization_matrix()).transpose() R = sqr(crystal_init_orientation.crystal_rotation_matrix()).transpose() from cctbx.crystal_orientation import crystal_orientation, basis_type CO = crystal_orientation(O * R, basis_type.direct) crystal_fin_orientation = CO.rotate_thru((1, 0, 0), rotx_fin).rotate_thru((0, 1, 0), roty_fin) # remove reflections with partiality below threshold i_sel = partiality_fin > iparams.merge.partiality_min partiality_fin_sel = partiality_fin.select(i_sel) rs_fin_sel = rs_fin.select(i_sel) rh_fin_sel = rh_fin.select(i_sel) observations_non_polar_sel = observations_non_polar.customized_copy( indices=observations_non_polar.indices().select(i_sel), data=observations_non_polar.data().select(i_sel), sigmas=observations_non_polar.sigmas().select(i_sel), ) observations_original_sel = observations_original.customized_copy( indices=observations_original.indices().select(i_sel), data=observations_original.data().select(i_sel), sigmas=observations_original.sigmas().select(i_sel), ) pres = postref_results() pres.set_params( observations=observations_non_polar_sel, observations_original=observations_original_sel, refined_params=refined_params, stats=stats, partiality=partiality_fin_sel, rs_set=rs_fin_sel, rh_set=rh_fin_sel, frame_no=frame_no, pickle_filename=pickle_filename, wavelength=wavelength, crystal_orientation=crystal_fin_orientation, detector_distance_mm=detector_distance_mm, ) r_change, r_xy_change, cc_change, cc_iso_change = (0, 0, 0, 0) try: r_change = ((pres.R_final - pres.R_init) / pres.R_init) * 100 r_xy_change = ((pres.R_xy_final - pres.R_xy_init) / pres.R_xy_init) * 100 cc_change = ((pres.CC_final - pres.CC_init) / pres.CC_init) * 100 cc_iso_change = ((pres.CC_iso_final - pres.CC_iso_init) / pres.CC_iso_init) * 100 except Exception: pass txt_postref = " {0:40} ==> RES:{1:5.2f} NREFL:{2:5d} R:{3:8.2f}% RXY:{4:8.2f}% CC:{5:6.2f}% CCISO:{6:6.2f}% G:{7:10.3e} B:{8:7.1f} CELL:{9:6.2f} {10:6.2f} {11:6.2f} {12:6.2f} {13:6.2f} {14:6.2f}".format( img_filename_only + " (" + polar_hkl + ")", observations_original_sel.d_min(), len(observations_original_sel.data()), r_change, r_xy_change, cc_change, cc_iso_change, pres.G, pres.B, a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin, ) print txt_postref txt_postref += "\n" return pres, txt_postref
def combine_pre_merge(self, result, iparams): mi_all = flex.miller_index() mio_all = flex.miller_index() I_all = flex.double() sigI_all = flex.double() G_all = flex.double() B_all = flex.double() p_all = flex.double() rs_all = flex.double() wavelength_all = flex.double() sin_all = flex.double() SE_all = flex.double() uc_mean_set = [] wavelength_mean_set = [] pickle_filename_all = flex.std_string() for res in result: for prep_output in res: _, _, mi, mio, I, sigI, G, B, p, rs, wavelength, sin, SE, uc_mean, wavelength_mean, pickle_filename_set, txt_out = prep_output mi_all.extend(mi) mio_all.extend(mio) I_all.extend(I) sigI_all.extend(sigI) G_all.extend(G) B_all.extend(B) p_all.extend(p) rs_all.extend(rs) wavelength_all.extend(wavelength) sin_all.extend(sin) SE_all.extend(SE) uc_mean_set.extend(uc_mean) wavelength_mean_set.append(wavelength_mean) pickle_filename_all.extend(pickle_filename_set) uc_mean = np.mean(np.array(uc_mean_set).reshape(-1, 6), axis=0) wavelength_mean = np.mean(wavelength_mean_set) ms_template = crystal.symmetry( unit_cell=tuple(uc_mean), space_group_symbol=iparams.target_space_group).build_miller_set( anomalous_flag=iparams.target_anomalous_flag, d_min=iparams.merge.d_min) ma_all = ms_template.array().customized_copy(indices=mi_all, data=I_all, sigmas=sigI_all) #sort reflections according to asymmetric-unit symmetry hkl perm = ma_all.sort_permutation(by_value="packed_indices") mi_all_sort = mi_all.select(perm) mio_all_sort = mio_all.select(perm) I_all_sort = I_all.select(perm) sigI_all_sort = sigI_all.select(perm) G_all_sort = G_all.select(perm) B_all_sort = B_all.select(perm) p_all_sort = p_all.select(perm) rs_all_sort = rs_all.select(perm) wavelength_all_sort = wavelength_all.select(perm) sin_all_sort = sin_all.select(perm) SE_all_sort = SE_all.select(perm) pickle_filename_all_sort = pickle_filename_all.select(perm) ma_uniq = ma_all.merge_equivalents().array().complete_array( d_min=iparams.merge.d_min, d_max=iparams.merge.d_max) matches_uniq = miller.match_multi_indices( miller_indices_unique=ma_uniq.indices(), miller_indices=mi_all_sort) pair_0 = flex.int([pair[0] for pair in matches_uniq.pairs()]) pair_1 = flex.int([pair[1] for pair in matches_uniq.pairs()]) group_id_list = flex.int( [pair_0[pair_1[i]] for i in range(len(matches_uniq.pairs()))]) tally = Counter() for elem in group_id_list: tally[elem] += 1 cn_group = len(tally) return cn_group, group_id_list, mi_all_sort, mio_all_sort, \ I_all_sort, sigI_all_sort, G_all_sort, B_all_sort, \ p_all_sort, rs_all_sort, wavelength_all_sort, sin_all_sort, SE_all_sort, uc_mean, \ wavelength_mean, pickle_filename_all_sort, ""
def scale_frame_detail(self, result, file_name, db_mgr, out): # If the pickled integration file does not contain a wavelength, # fall back on the value given on the command line. XXX The # wavelength parameter should probably be removed from master_phil # once all pickled integration files contain it. if (result.has_key("wavelength")): wavelength = result["wavelength"] elif (self.params.wavelength is not None): wavelength = self.params.wavelength else: # XXX Give error, or raise exception? return None assert (wavelength > 0) observations = result["observations"][0] cos_two_polar_angle = result["cos_two_polar_angle"] assert observations.size() == cos_two_polar_angle.size() tt_vec = observations.two_theta(wavelength) #print "mean tt degrees",180.*flex.mean(tt_vec.data())/math.pi cos_tt_vec = flex.cos( tt_vec.data() ) sin_tt_vec = flex.sin( tt_vec.data() ) cos_sq_tt_vec = cos_tt_vec * cos_tt_vec sin_sq_tt_vec = sin_tt_vec * sin_tt_vec P_nought_vec = 0.5 * (1. + cos_sq_tt_vec) F_prime = -1.0 # Hard-coded value defines the incident polarization axis P_prime = 0.5 * F_prime * cos_two_polar_angle * sin_sq_tt_vec # XXX added as a diagnostic prange=P_nought_vec - P_prime other_F_prime = 1.0 otherP_prime = 0.5 * other_F_prime * cos_two_polar_angle * sin_sq_tt_vec otherprange=P_nought_vec - otherP_prime diff2 = flex.abs(prange - otherprange) print "mean diff is",flex.mean(diff2), "range",flex.min(diff2), flex.max(diff2) # XXX done observations = observations / ( P_nought_vec - P_prime ) # This corrects observations for polarization assuming 100% polarization on # one axis (thus the F_prime = -1.0 rather than the perpendicular axis, 1.0) # Polarization model as described by Kahn, Fourme, Gadet, Janin, Dumas & Andre # (1982) J. Appl. Cryst. 15, 330-337, equations 13 - 15. print "Step 3. Correct for polarization." indexed_cell = observations.unit_cell() observations_original_index = observations.deep_copy() if result.get("model_partialities",None) is not None and result["model_partialities"][0] is not None: # some recordkeeping useful for simulations partialities_original_index = observations.customized_copy( crystal_symmetry=self.miller_set.crystal_symmetry(), data = result["model_partialities"][0]["data"], sigmas = flex.double(result["model_partialities"][0]["data"].size()), #dummy value for sigmas indices = result["model_partialities"][0]["indices"], ).resolution_filter(d_min=self.params.d_min) assert len(observations_original_index.indices()) == len(observations.indices()) # Now manipulate the data to conform to unit cell, asu, and space group # of reference. The resolution will be cut later. # Only works if there is NOT an indexing ambiguity! observations = observations.customized_copy( anomalous_flag=not self.params.merge_anomalous, crystal_symmetry=self.miller_set.crystal_symmetry() ).map_to_asu() observations_original_index = observations_original_index.customized_copy( anomalous_flag=not self.params.merge_anomalous, crystal_symmetry=self.miller_set.crystal_symmetry() ) print "Step 4. Filter on global resolution and map to asu" print >> out, "Data in reference setting:" #observations.show_summary(f=out, prefix=" ") show_observations(observations, out=out) #if self.params.significance_filter.apply is True: # raise Exception("significance filter not implemented in samosa") if self.params.significance_filter.apply is True: #------------------------------------ # Apply an I/sigma filter ... accept resolution bins only if they # have significant signal; tends to screen out higher resolution observations # if the integration model doesn't quite fit N_obs_pre_filter = observations.size() N_bins_small_set = N_obs_pre_filter // self.params.significance_filter.min_ct N_bins_large_set = N_obs_pre_filter // self.params.significance_filter.max_ct # Ensure there is at least one bin. N_bins = max( [min([self.params.significance_filter.n_bins,N_bins_small_set]), N_bins_large_set, 1] ) print "Total obs %d Choose n bins = %d"%(N_obs_pre_filter,N_bins) bin_results = show_observations(observations, out=out, n_bins=N_bins) #show_observations(observations, out=sys.stdout, n_bins=N_bins) acceptable_resolution_bins = [ bin.mean_I_sigI > self.params.significance_filter.sigma for bin in bin_results] acceptable_nested_bin_sequences = [i for i in xrange(len(acceptable_resolution_bins)) if False not in acceptable_resolution_bins[:i+1]] if len(acceptable_nested_bin_sequences)==0: return null_data( file_name=file_name, log_out=out.getvalue(), low_signal=True) else: N_acceptable_bins = max(acceptable_nested_bin_sequences) + 1 imposed_res_filter = float(bin_results[N_acceptable_bins-1].d_range.split()[2]) imposed_res_sel = observations.resolution_filter_selection( d_min=imposed_res_filter) observations = observations.select( imposed_res_sel) observations_original_index = observations_original_index.select( imposed_res_sel) print "New resolution filter at %7.2f"%imposed_res_filter,file_name print "N acceptable bins",N_acceptable_bins print "Old n_obs: %d, new n_obs: %d"%(N_obs_pre_filter,observations.size()) print "Step 5. Frame by frame resolution filter" # Finished applying the binwise I/sigma filter--------------------------------------- if self.params.raw_data.sdfac_auto is True: raise Exception("sdfac auto not implemented in samosa.") print "Step 6. Match to reference intensities, filter by correlation, filter out negative intensities." assert len(observations_original_index.indices()) \ == len(observations.indices()) data = frame_data(self.n_refl, file_name) data.set_indexed_cell(indexed_cell) data.d_min = observations.d_min() # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. if self.i_model is not None: assert len(self.i_model.indices()) == len(self.miller_set.indices()) \ and (self.i_model.indices() == self.miller_set.indices()).count(False) == 0 matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) use_weights = False # New facility for getting variance-weighted correlation if self.params.scaling.algorithm in ['mark1','levmar']: # Because no correlation is computed, the correlation # coefficient is fixed at zero. Setting slope = 1 means # intensities are added without applying a scale factor. sum_x = 0 sum_y = 0 for pair in matches.pairs(): data.n_obs += 1 if not self.params.include_negatives and observations.data()[pair[1]] <= 0: data.n_rejected += 1 else: sum_y += observations.data()[pair[1]] N = data.n_obs - data.n_rejected # Early return if there are no positive reflections on the frame. if data.n_obs <= data.n_rejected: return null_data( file_name=file_name, log_out=out.getvalue(), low_signal=True) # Update the count for each matched reflection. This counts # reflections with non-positive intensities, too. data.completeness += matches.number_of_matches(0).as_int() data.wavelength = wavelength if not self.params.scaling.enable: # Do not scale anything print "Scale factor to an isomorphous reference PDB will NOT be applied." slope = 1.0 offset = 0.0 observations_original_index_indices = observations_original_index.indices() if db_mgr is None: return unpack(MINI.x) # special exit for two-color indexing kwargs = {'wavelength': wavelength, 'beam_x': result['xbeam'], 'beam_y': result['ybeam'], 'distance': result['distance'], 'unique_file_name': data.file_name} ORI = result["current_orientation"][0] Astar = matrix.sqr(ORI.reciprocal_matrix()) kwargs['res_ori_1'] = Astar[0] kwargs['res_ori_2'] = Astar[1] kwargs['res_ori_3'] = Astar[2] kwargs['res_ori_4'] = Astar[3] kwargs['res_ori_5'] = Astar[4] kwargs['res_ori_6'] = Astar[5] kwargs['res_ori_7'] = Astar[6] kwargs['res_ori_8'] = Astar[7] kwargs['res_ori_9'] = Astar[8] assert self.params.scaling.report_ML is True kwargs['half_mosaicity_deg'] = result["ML_half_mosaicity_deg"][0] kwargs['domain_size_ang'] = result["ML_domain_size_ang"][0] frame_id_0_base = db_mgr.insert_frame(**kwargs) xypred = result["mapped_predictions"][0] indices = flex.size_t([pair[1] for pair in matches.pairs()]) sel_observations = flex.intersection( size=observations.data().size(), iselections=[indices]) set_original_hkl = observations_original_index_indices.select( flex.intersection( size=observations_original_index_indices.size(), iselections=[indices])) set_xypred = xypred.select( flex.intersection( size=xypred.size(), iselections=[indices])) kwargs = {'hkl_id_0_base': [pair[0] for pair in matches.pairs()], 'i': observations.data().select(sel_observations), 'sigi': observations.sigmas().select(sel_observations), 'detector_x': [xy[0] for xy in set_xypred], 'detector_y': [xy[1] for xy in set_xypred], 'frame_id_0_base': [frame_id_0_base] * len(matches.pairs()), 'overload_flag': [0] * len(matches.pairs()), 'original_h': [hkl[0] for hkl in set_original_hkl], 'original_k': [hkl[1] for hkl in set_original_hkl], 'original_l': [hkl[2] for hkl in set_original_hkl]} db_mgr.insert_observation(**kwargs) print >> out, "Lattice: %d reflections" % (data.n_obs - data.n_rejected) print >> out, "average obs", sum_y / (data.n_obs - data.n_rejected), \ "average calc", sum_x / (data.n_obs - data.n_rejected) print >> out, "Rejected %d reflections with negative intensities" % \ data.n_rejected data.accept = True for pair in matches.pairs(): if not self.params.include_negatives and (observations.data()[pair[1]] <= 0) : continue Intensity = observations.data()[pair[1]] # Super-rare exception. If saved sigmas instead of I/sigmas in the ISIGI dict, this wouldn't be needed. if Intensity == 0: continue # Add the reflection as a two-tuple of intensity and I/sig(I) # to the dictionary of observations. index = self.miller_set.indices()[pair[0]] isigi = (Intensity, observations.data()[pair[1]] / observations.sigmas()[pair[1]], 1.0) if index in data.ISIGI: data.ISIGI[index].append(isigi) else: data.ISIGI[index] = [isigi] sigma = observations.sigmas()[pair[1]] variance = sigma * sigma data.summed_N[pair[0]] += 1 data.summed_wt_I[pair[0]] += Intensity / variance data.summed_weight[pair[0]] += 1 / variance data.set_log_out(out.getvalue()) return data
def prepare_output(self, results, iparams, avg_mode): if avg_mode == 'average': cc_thres = 0 else: cc_thres = iparams.frame_accept_min_cc std_filter = iparams.sigma_rejection if iparams.flag_weak_anomalous: if avg_mode == 'final': target_anomalous_flag = iparams.target_anomalous_flag else: target_anomalous_flag = False else: target_anomalous_flag = iparams.target_anomalous_flag pr_params_mean, pr_params_med, pr_params_std = self.calc_mean_postref_parameters( results) G_mean, B_mean, ry_mean, rz_mean, re_mean, r0_mean, voigt_nu_mean, rotx_mean, roty_mean, R_mean, R_xy_mean, SE_mean = pr_params_mean G_med, B_med, ry_med, rz_med, re_med, r0_med, voigt_nu_med, rotx_med, roty_med, R_med, R_xy_med, SE_med = pr_params_med G_std, B_std, ry_std, rz_std, re_std, r0_std, voigt_nu_std, rotx_std, roty_std, R_std, R_xy_std, SE_std = pr_params_std #prepare data for merging miller_indices_all = flex.miller_index() miller_indices_ori_all = flex.miller_index() I_all = flex.double() sigI_all = flex.double() G_all = flex.double() B_all = flex.double() p_all = flex.double() rx_all = flex.double() rs_all = flex.double() rh_all = flex.double() SE_all = flex.double() sin_sq_all = flex.double() wavelength_all = flex.double() detector_distance_set = flex.double() R_init_all = flex.double() R_final_all = flex.double() R_xy_init_all = flex.double() R_xy_final_all = flex.double() pickle_filename_all = flex.std_string() filtered_results = [] cn_good_frame, cn_bad_frame_SE, cn_bad_frame_uc, cn_bad_frame_cc, cn_bad_frame_G, cn_bad_frame_re = ( 0, 0, 0, 0, 0, 0) crystal_orientation_dict = {} for pres in results: if pres is not None: pickle_filepath = pres.pickle_filename.split('/') img_filename = pickle_filepath[len(pickle_filepath) - 1] flag_pres_ok = True #check SE, CC, UC, G, B, gamma_e if math.isnan(pres.G): flag_pres_ok = False if math.isnan(pres.SE) or np.isinf(pres.SE): flag_pres_ok = False if flag_pres_ok and SE_std > 0: if abs(pres.SE - SE_med) / SE_std > std_filter: flag_pres_ok = False cn_bad_frame_SE += 1 if flag_pres_ok and pres.CC_final < cc_thres: flag_pres_ok = False cn_bad_frame_cc += 1 if flag_pres_ok: if G_std > 0: if abs(pres.G - G_med) / G_std > std_filter: flag_pres_ok = False cn_bad_frame_G += 1 if flag_pres_ok: if re_std > 0: if abs(pres.re - re_med) / re_std > std_filter: flag_pres_ok = False cn_bad_frame_re += 1 if flag_pres_ok and not good_unit_cell( pres.uc_params, iparams, iparams.merge.uc_tolerance): flag_pres_ok = False cn_bad_frame_uc += 1 data_size = pres.observations.size() if flag_pres_ok: cn_good_frame += 1 filtered_results.append(pres) R_init_all.append(pres.R_init) R_final_all.append(pres.R_final) R_xy_init_all.append(pres.R_xy_init) R_xy_final_all.append(pres.R_xy_final) miller_indices_all.extend(pres.observations.indices()) miller_indices_ori_all.extend( pres.observations_original.indices()) I_all.extend(pres.observations.data()) sigI_all.extend(pres.observations.sigmas()) G_all.extend(flex.double([pres.G] * data_size)) B_all.extend(flex.double([pres.B] * data_size)) p_all.extend(pres.partiality) rs_all.extend(pres.rs_set) rh_all.extend(pres.rh_set) sin_sq_all.extend( pres.observations.two_theta(wavelength=pres.wavelength) .sin_theta_over_lambda_sq().data()) SE_all.extend(flex.double([pres.SE] * data_size)) wavelength_all.extend( flex.double([pres.wavelength] * data_size)) detector_distance_set.append(pres.detector_distance_mm) pickle_filename_all.extend( flex.std_string([pres.pickle_filename] * data_size)) crystal_orientation_dict[ pres.pickle_filename] = pres.crystal_orientation #plot stats self.plot_stats(filtered_results, iparams) #write out updated crystal orientation as a pickle file if not iparams.flag_hush: pickle.dump(crystal_orientation_dict, open(iparams.run_no + '/' + "crystal.o", "wb"), pickle.HIGHEST_PROTOCOL) #calculate average unit cell uc_mean, uc_med, uc_std = self.calc_mean_unit_cell(filtered_results) unit_cell_mean = unit_cell(tuple(uc_mean)) #recalculate stats for pr parameters pr_params_mean, pr_params_med, pr_params_std = self.calc_mean_postref_parameters( filtered_results) G_mean, B_mean, ry_mean, rz_mean, re_mean, r0_mean, voigt_nu_mean, rotx_mean, roty_mean, R_mean, R_xy_mean, SE_mean = pr_params_mean G_med, B_med, ry_med, rz_med, re_med, r0_med, voigt_nu_med, rotx_med, roty_med, R_med, R_xy_med, SE_med = pr_params_med G_std, B_std, ry_std, rz_std, re_std, r0_std, voigt_nu_std, rotx_std, roty_std, R_std, R_xy_std, SE_std = pr_params_std #from all observations merge them crystal_symmetry = crystal.symmetry( unit_cell=tuple(uc_mean), space_group_symbol=iparams.target_space_group) miller_set_all = miller.set(crystal_symmetry=crystal_symmetry, indices=miller_indices_all, anomalous_flag=target_anomalous_flag) miller_array_all = miller_set_all.array( data=I_all, sigmas=sigI_all).set_observation_type_xray_intensity() #sort reflections according to asymmetric-unit symmetry hkl perm = miller_array_all.sort_permutation(by_value="packed_indices") miller_indices_all_sort = miller_array_all.indices().select(perm) miller_indices_ori_all_sort = miller_indices_ori_all.select(perm) I_obs_all_sort = miller_array_all.data().select(perm) sigI_obs_all_sort = miller_array_all.sigmas().select(perm) G_all_sort = G_all.select(perm) B_all_sort = B_all.select(perm) p_all_sort = p_all.select(perm) rs_all_sort = rs_all.select(perm) wavelength_all_sort = wavelength_all.select(perm) sin_sq_all_sort = sin_sq_all.select(perm) SE_all_sort = SE_all.select(perm) pickle_filename_all_sort = pickle_filename_all.select(perm) miller_array_uniq = miller_array_all.merge_equivalents().array( ).complete_array(d_min=iparams.merge.d_min, d_max=iparams.merge.d_max) matches_uniq = miller.match_multi_indices( miller_indices_unique=miller_array_uniq.indices(), miller_indices=miller_indices_all_sort) pair_0 = flex.int([pair[0] for pair in matches_uniq.pairs()]) pair_1 = flex.int([pair[1] for pair in matches_uniq.pairs()]) group_id_list = flex.int( [pair_0[pair_1[i]] for i in range(len(matches_uniq.pairs()))]) tally = Counter() for elem in group_id_list: tally[elem] += 1 cn_group = len(tally) #preparte txt out stat txt_out = 'Summary of refinement and merging\n' txt_out += ' No. good frames: %12.0f\n' % (cn_good_frame) txt_out += ' No. bad cc frames: %12.0f\n' % (cn_bad_frame_cc) txt_out += ' No. bad G frames) : %12.0f\n' % (cn_bad_frame_G) txt_out += ' No. bad unit cell frames: %12.0f\n' % (cn_bad_frame_uc) txt_out += ' No. bad gamma_e frames: %12.0f\n' % (cn_bad_frame_re) txt_out += ' No. bad SE: %12.0f\n' % (cn_bad_frame_SE) txt_out += ' No. observations: %12.0f\n' % ( len(I_obs_all_sort)) txt_out += 'Mean target value (BEFORE: Mean Median (Std.))\n' txt_out += ' post-refinement: %12.2f %12.2f (%9.2f)\n' % ( np.mean(R_init_all), np.median(R_init_all), np.std(R_init_all)) txt_out += ' (x,y) restraints: %12.2f %12.2f (%9.2f)\n' % ( np.mean(R_xy_init_all), np.median(R_xy_init_all), np.std(R_xy_init_all)) txt_out += 'Mean target value (AFTER: Mean Median (Std.))\n' txt_out += ' post-refinement: %12.2f %12.2f (%9.2f)\n' % ( np.mean(R_final_all), np.median(R_final_all), np.std(R_final_all)) txt_out += ' (x,y) restraints: %12.2f %12.2f (%9.2f)\n' % ( np.mean(R_xy_final_all), np.median(R_xy_final_all), np.std(R_xy_final_all)) txt_out += ' SE: %12.2f %12.2f (%9.2f)\n' % ( SE_mean, SE_med, SE_std) txt_out += ' G: %12.3e %12.3e (%9.2e)\n' % ( G_mean, G_med, G_std) txt_out += ' B: %12.2f %12.2f (%9.2f)\n' % ( B_mean, B_med, B_std) txt_out += ' Rot.x: %12.2f %12.2f (%9.2f)\n' % ( rotx_mean * 180 / math.pi, rotx_med * 180 / math.pi, rotx_std * 180 / math.pi) txt_out += ' Rot.y: %12.2f %12.2f (%9.2f)\n' % ( roty_mean * 180 / math.pi, roty_med * 180 / math.pi, roty_std * 180 / math.pi) txt_out += ' gamma_y: %12.5f %12.5f (%9.5f)\n' % ( ry_mean, ry_med, ry_std) txt_out += ' gamma_z: %12.5f %12.5f (%9.5f)\n' % ( rz_mean, rz_med, rz_std) txt_out += ' gamma_0: %12.5f %12.5f (%9.5f)\n' % ( r0_mean, r0_med, r0_std) txt_out += ' gamma_e: %12.5f %12.5f (%9.5f)\n' % ( re_mean, re_med, re_std) txt_out += ' voigt_nu: %12.5f %12.5f (%9.5f)\n' % ( voigt_nu_mean, voigt_nu_med, voigt_nu_std) txt_out += ' unit cell\n' txt_out += ' a: %12.2f %12.2f (%9.2f)\n' % ( uc_mean[0], uc_med[0], uc_std[0]) txt_out += ' b: %12.2f %12.2f (%9.2f)\n' % ( uc_mean[1], uc_med[1], uc_std[1]) txt_out += ' c: %12.2f %12.2f (%9.2f)\n' % ( uc_mean[2], uc_med[2], uc_std[2]) txt_out += ' alpha: %12.2f %12.2f (%9.2f)\n' % ( uc_mean[3], uc_med[3], uc_std[3]) txt_out += ' beta: %12.2f %12.2f (%9.2f)\n' % ( uc_mean[4], uc_med[4], uc_std[4]) txt_out += ' gamma: %12.2f %12.2f (%9.2f)\n' % ( uc_mean[5], uc_med[5], uc_std[5]) txt_out += 'Parmeters from integration (not-refined)\n' txt_out += ' Wavelength: %12.5f %12.5f (%9.5f)\n' % ( np.mean(wavelength_all), np.median(wavelength_all), np.std(wavelength_all)) txt_out += ' Detector distance: %12.5f %12.5f (%9.5f)\n' % ( np.mean(detector_distance_set), np.median(detector_distance_set), np.std(detector_distance_set)) txt_out += '* (standard deviation)\n' #write out stat. pickle if not iparams.flag_hush: stat_dict = {"n_frames_good": [cn_good_frame], \ "n_frames_bad_cc": [cn_bad_frame_cc], \ "n_frames_bad_G": [cn_bad_frame_G], \ "n_frames_bad_uc": [cn_bad_frame_uc], \ "n_frames_bad_gamma_e": [cn_bad_frame_re], \ "n_frames_bad_SE": [cn_bad_frame_SE], \ "n_observations": [len(I_obs_all_sort)], \ "R_start": [np.mean(R_init_all)], \ "R_end": [np.mean(R_final_all)], \ "R_xy_start": [np.mean(R_xy_init_all)], \ "R_xy_end": [np.mean(R_xy_final_all)], \ "mean_gamma_y": [ry_mean], \ "std_gamma_y": [ry_std], \ "mean_gamma_z": [rz_mean], \ "std_gamma_z": [rz_std], \ "mean_gamma_0": [r0_mean], \ "std_gamma_0": [r0_std], \ "mean_gamma_e": [re_mean], \ "std_gamma_e": [re_std], \ "mean_voigt_nu": [voigt_nu_mean], \ "std_voigt_nu": [voigt_nu_std], \ "mean_a": [uc_mean[0]], \ "std_a": [uc_std[0]], \ "mean_b": [uc_mean[1]], \ "std_b": [uc_std[1]], \ "mean_c": [uc_mean[2]], \ "std_c": [uc_std[2]], \ "mean_alpha": [uc_mean[3]], \ "std_alpha": [uc_std[3]], \ "mean_beta": [uc_mean[4]], \ "std_beta": [uc_std[4]], \ "mean_gamma": [uc_mean[5]], \ "std_gamma": [uc_std[5]]} self.write_stat_pickle(iparams, stat_dict) return cn_group, group_id_list, miller_indices_all_sort, miller_indices_ori_all_sort, \ I_obs_all_sort, sigI_obs_all_sort,G_all_sort, B_all_sort, \ p_all_sort, rs_all_sort, wavelength_all_sort, sin_sq_all_sort, SE_all_sort, uc_mean, \ np.mean(wavelength_all), pickle_filename_all_sort, txt_out
def optimize(self, I_r_flex, observations_original, wavelength, crystal_init_orientation, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, iparams, pres_in, observations_non_polar, detector_distance_mm): ph = partiality_handler() lph = lbfgs_partiality_handler() if iparams.postref.allparams.flag_on: refine_steps = ['allparams'] else: refine_steps = ['crystal_orientation'] if iparams.postref.reflecting_range.flag_on: refine_steps.append('reflecting_range') if iparams.postref.unit_cell.flag_on: refine_steps.append('unit_cell') #get miller array iso, if given. miller_array_iso = None #prepare data pr_d_min = iparams.postref.allparams.d_min pr_d_max = iparams.postref.allparams.d_max pr_sigma_min = iparams.postref.allparams.sigma_min pr_partiality_min = iparams.postref.allparams.partiality_min pr_uc_tol = iparams.postref.allparams.uc_tolerance cs = observations_original.crystal_symmetry().space_group( ).crystal_system() #filter by resolution observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.get_filtered_data(\ 'resolution', [pr_d_min, pr_d_max], observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, I_r_flex) #filter by sigma observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.get_filtered_data(\ 'sigma', [pr_sigma_min], observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, spot_pred_y_mm_sel, I_ref_sel) #initialize values only in the first sub cycle and the first refine step. spot_radius = ph.calc_spot_radius( sqr(crystal_init_orientation.reciprocal_matrix()), observations_original_sel.indices(), wavelength) if pres_in is None: ry, rz, r0, re, voigt_nu, rotx, roty = 0, 0, spot_radius, iparams.gamma_e, iparams.voigt_nu, 0.0, 0.0 #apply constrain on the unit cell using crystal system uc_scale_inp = lph.prep_input( observations_original.unit_cell().parameters(), cs) uc_scale_constrained = lph.prep_output(uc_scale_inp, cs) a, b, c, alpha, beta, gamma = uc_scale_constrained const_params_scale = (rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma) xopt_scalefactors, stats = self.optimize_scalefactors( I_r_flex, observations_original, wavelength, crystal_init_orientation, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, iparams, pres_in, observations_non_polar, detector_distance_mm, const_params_scale) G, B = xopt_scalefactors else: G, B, ry, rz, r0, re, voigt_nu, rotx, roty = pres_in.G, pres_in.B, pres_in.ry, pres_in.rz, pres_in.r0, pres_in.re, pres_in.voigt_nu, 0.0, 0.0 a, b, c, alpha, beta, gamma = pres_in.unit_cell.parameters() crystal_init_orientation = pres_in.crystal_orientation #filter by partiality two_theta = observations_original_sel.two_theta( wavelength=wavelength).data() uc = unit_cell((a, b, c, alpha, beta, gamma)) partiality_init, delta_xy_init, rs_init, dummy = ph.calc_partiality_anisotropy_set( uc, rotx, roty, observations_original_sel.indices(), ry, rz, r0, re, voigt_nu, two_theta, alpha_angle_sel, wavelength, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence) observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.get_filtered_data(\ 'partiality', [pr_partiality_min], observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, spot_pred_y_mm_sel, I_ref_sel, partiality_in=partiality_init) I_r_true = I_ref_sel[:] I_o_true = observations_original_sel.data()[:] #calculate initial residual_xy error const_params_uc = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu) xinp_uc = lph.prep_input((a, b, c, alpha, beta, gamma), cs) args_uc = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'unit_cell', const_params_uc, B, miller_array_iso, iparams) uc_params_err = lph.func(xinp_uc, args_uc) init_residual_xy_err = flex.sum(uc_params_err**2) #calculate initial residual_pr error const_params_all = (G, B) xinp_all = flex.double([rotx, roty, ry, rz, r0, re, voigt_nu]) xinp_all.extend(lph.prep_input((a, b, c, alpha, beta, gamma), cs)) args_all = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'allparams', const_params_all, B, miller_array_iso, iparams) all_params_err = lph.func(xinp_all, args_all) init_residual_err = flex.sum(all_params_err**2) #keep in list t_pr_list = [init_residual_err] t_xy_list = [init_residual_xy_err] refined_params_hist = [(G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma)] txt_out = '' for i_sub_cycle in range(iparams.n_postref_sub_cycle): for j_refine_step in range(len(refine_steps)): refine_mode = refine_steps[j_refine_step] #prepare data init_params = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma) observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.prepare_data_microcycle(refine_mode, iparams, observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, I_r_flex, init_params, crystal_init_orientation, wavelength, detector_distance_mm) I_r_true = I_ref_sel[:] I_o_true = observations_original_sel.data() if refine_mode == 'crystal_orientation': xinp = flex.double([rotx, roty]) const_params = (G, B, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma) elif refine_mode == 'reflecting_range': xinp = flex.double([ry, rz, r0, re, voigt_nu]) const_params = (G, B, rotx, roty, a, b, c, alpha, beta, gamma) elif refine_mode == 'unit_cell': xinp = lph.prep_input((a, b, c, alpha, beta, gamma), cs) const_params = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu) elif refine_mode == 'allparams': xinp = flex.double([rotx, roty, ry, rz, r0, re, voigt_nu]) xinp.extend( lph.prep_input((a, b, c, alpha, beta, gamma), cs)) const_params = (G, B) args = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, refine_mode, const_params, B, miller_array_iso, iparams) lh = lbfgs_handler(current_x=xinp, args=args) xopt = flex.double(list(lh.x)) if refine_mode == 'crystal_orientation' or \ refine_mode == 'reflecting_range' or refine_mode == 'allparams': current_residual_err = lh.f #calculate residual_xy_error (for refine_mode = SF, CO, RR, and all params) xinp_uc = lph.prep_input((a, b, c, alpha, beta, gamma), cs) if refine_mode == 'crystal_orientation': rotx, roty = xopt elif refine_mode == 'reflecting_range': ry, rz, r0, re, voigt_nu = xopt elif refine_mode == 'allparams': rotx, roty, ry, rz, r0, re, voigt_nu = xopt[:7] xinp_uc = xopt[7:] a, b, c, alpha, beta, gamma = lph.prep_output( xinp_uc, cs) const_params_uc = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu) xinp_uc = lph.prep_input((a, b, c, alpha, beta, gamma), cs) args_uc = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'unit_cell', const_params_uc, B, miller_array_iso, iparams) uc_params_err = lph.func(xinp_uc, args_uc) current_residual_xy_err = flex.sum(uc_params_err**2) elif refine_mode == 'unit_cell': current_residual_xy_err = lh.f xopt_uc = lph.prep_output(xopt, cs) a, b, c, alpha, beta, gamma = xopt_uc #check the unit-cell with the reference intensity xinp = flex.double([rotx, roty, ry, rz, r0, re, voigt_nu]) xinp.extend( lph.prep_input((a, b, c, alpha, beta, gamma), cs)) const_params_all = (G, B) args_all = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'allparams', const_params_all, B, miller_array_iso, iparams) all_params_err = lph.func(xinp_all, args_all) current_residual_err = flex.sum(all_params_err**2) flag_success = False if refine_mode == 'allparams': #if allparams refinement, only check the post-refine target function if current_residual_err < (t_pr_list[len(t_pr_list)-1] + \ (t_pr_list[len(t_pr_list)-1]*iparams.postref.residual_threshold/100)): t_pr_list.append(current_residual_err) t_xy_list.append(current_residual_xy_err) refined_params_hist.append( (G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma)) flag_success = True else: if current_residual_err < (t_pr_list[len(t_pr_list)-1] + \ (t_pr_list[len(t_pr_list)-1]*iparams.postref.residual_threshold/100)): if current_residual_xy_err < (t_xy_list[len(t_xy_list)-1] + \ (t_xy_list[len(t_xy_list)-1]*iparams.postref.residual_threshold_xy/100)): t_pr_list.append(current_residual_err) t_xy_list.append(current_residual_xy_err) refined_params_hist.append( (G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma)) flag_success = True if flag_success is False: G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma = refined_params_hist[ len(refined_params_hist) - 1] tmp_txt_out = refine_mode + ' %3.0f %6.4f %6.4f %6.4f %6.4f %10.8f %10.8f %10.8f %10.8f %10.8f %6.3f %6.3f %.4g %6.3f\n' % ( i_sub_cycle, G, B, rotx * 180 / math.pi, roty * 180 / math.pi, ry, rz, r0, re, voigt_nu, a, c, t_pr_list[ len(t_pr_list) - 1], t_xy_list[len(t_pr_list) - 1]) txt_out += tmp_txt_out #apply the refined parameters on the full (original) reflection set two_theta = observations_original.two_theta( wavelength=wavelength).data() sin_theta_over_lambda_sq = observations_original.two_theta( wavelength=wavelength).sin_theta_over_lambda_sq().data() if pres_in is None: partiality_init, delta_xy_init, rs_init, rh_init = ph.calc_partiality_anisotropy_set(\ observations_original.unit_cell(),0.0, 0.0,observations_original.indices(), 0, 0, spot_radius, iparams.gamma_e, iparams.voigt_nu, two_theta, alpha_angle, wavelength, crystal_init_orientation,spot_pred_x_mm, spot_pred_y_mm,detector_distance_mm, iparams.partiality_model,iparams.flag_beam_divergence) I_o_init = ph.calc_full_refl(observations_original.data(), sin_theta_over_lambda_sq, 1, 0, partiality_init, rs_init) else: partiality_init, delta_xy_init, rs_init, rh_init = ph.calc_partiality_anisotropy_set(\ pres_in.unit_cell,0.0, 0.0,observations_original.indices(), pres_in.ry, pres_in.rz,pres_in.r0, pres_in.re, pres_in.voigt_nu, two_theta, alpha_angle, wavelength, crystal_init_orientation,spot_pred_x_mm, spot_pred_y_mm,detector_distance_mm, iparams.partiality_model,iparams.flag_beam_divergence) I_o_init = ph.calc_full_refl(observations_original.data(), sin_theta_over_lambda_sq, pres_in.G, pres_in.B, partiality_init, rs_init) partiality_fin, delta_xy_fin, rs_fin, rh_fin = ph.calc_partiality_anisotropy_set(\ unit_cell((a,b,c,alpha,beta,gamma)),rotx, roty,observations_original.indices(), ry, rz, r0, re, voigt_nu, two_theta, alpha_angle, wavelength,crystal_init_orientation, spot_pred_x_mm, spot_pred_y_mm,detector_distance_mm, iparams.partiality_model,iparams.flag_beam_divergence) I_o_fin = ph.calc_full_refl(observations_original.data(), sin_theta_over_lambda_sq, G, B, partiality_fin, rs_fin) SE_of_the_estimate = standard_error_of_the_estimate( I_r_flex, I_o_fin, 13) R_sq = coefficient_of_determination(I_r_flex, I_o_fin) * 100 CC_init = flex.linear_correlation(I_r_flex, I_o_init).coefficient() CC_final = flex.linear_correlation(I_r_flex, I_o_fin).coefficient() err_init = (I_r_flex - I_o_init) / observations_original.sigmas() R_init = math.sqrt(flex.sum(err_init**2)) err_final = (I_r_flex - I_o_fin) / observations_original.sigmas() R_final = math.sqrt(flex.sum(err_final**2)) R_xy_init = math.sqrt(flex.sum(delta_xy_init**2)) R_xy_final = math.sqrt(flex.sum(delta_xy_fin**2)) if R_init < R_final or re > (iparams.gamma_e * 3): CC_final = CC_init R_final = R_init R_xy_final = R_xy_init if pres_in is None: G, B, r0, ry, rz, re, rotx, roty = (1.0, 0.0, spot_radius, 0.0, 0.0, iparams.gamma_e, 0.0, 0.0) a, b, c, alpha, beta, gamma = observations_original.unit_cell( ).parameters() else: G, B, r0, ry, rz, re, rotx, roty = (pres_in.G, pres_in.B, pres_in.r0, pres_in.ry, pres_in.rz, pres_in.re, 0.0, 0.0) a, b, c, alpha, beta, gamma = pres_in.unit_cell.parameters() crystal_init_orientation = pres_in.crystal_orientation #calculate CCiso if hklisoin is given CC_iso_init, CC_iso_final = (0, 0) if iparams.hklisoin is not None: if miller_array_iso is not None: from cctbx import miller matches = miller.match_multi_indices( miller_indices_unique=miller_array_iso.indices(), miller_indices=observations_non_polar.indices()) I_iso_match = flex.double([ miller_array_iso.data()[pair[0]] for pair in matches.pairs() ]) I_o_init_match = flex.double( [I_o_init[pair[1]] for pair in matches.pairs()]) I_o_fin_match = flex.double( [I_o_fin[pair[1]] for pair in matches.pairs()]) CC_iso_init = flex.linear_correlation( I_iso_match, I_o_init_match).coefficient() CC_iso_final = flex.linear_correlation( I_iso_match, I_o_fin_match).coefficient() xopt = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma) return xopt, (SE_of_the_estimate, R_sq, CC_init, CC_final, R_init, R_final, R_xy_init, R_xy_final, CC_iso_init, CC_iso_final), len(I_ref_sel)
def run(args): cmd_line = command_line.argument_interpreter(master_params=master_phil_scope) working_phil, args = cmd_line.process_and_fetch( args=args, custom_processor="collect_remaining" ) working_phil.show() params = working_phil.extract() files = args from cctbx import crystal from iotbx.reflection_file_reader import any_reflection_file file_name_dict = {} wedge_id = -1 wedge_number = -1 wedge_number_to_wedge_id = {} assert params.space_group is not None assert params.unit_cell is not None space_group = params.space_group.group() unit_cell = params.unit_cell crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) for file_name in files: file_name = os.path.abspath(file_name) print(file_name) wedge_number_ = None for s in file_name.split(os.path.sep): if s.startswith("sweep_"): wedge_number_ = int(os.path.splitext(s)[0][-3:]) print("wedge_number:", wedge_number_) break if wedge_number_ is not None: wedge_number = wedge_number_ else: wedge_number += 1 lattice_id = 1 for s in file_name.split(os.path.sep): if s.startswith("lattice_"): lattice_id = int(os.path.splitext(s)[0].split("_")[-1]) print("lattice_id:", lattice_id) break wedge_id += 1 print( "wedge_id: %i, wedge_number: %i, lattice_id: %i" % (wedge_id, wedge_number, lattice_id) ) wedge_number_to_wedge_id.setdefault(wedge_number, []) wedge_number_to_wedge_id[wedge_number].append(wedge_id) # if not intensities.crystal_symmetry().is_similar_symmetry( # crystal_symmetry, relative_length_tolerance=0.1): # continue file_name_dict[wedge_id] = file_name if params.overlaps.find_overlaps: # figure out the overlapping reflections and save the miller indices # for later on reject_hkl = {} def run_find_overlaps(args): wedge_n, wedge_ids = args result_dict = {} print("Wedge", wedge_n) if len(wedge_ids) > 1: for wedge_id in wedge_ids: args = [ "dials.import_xds", os.path.split(file_name_dict[wedge_id])[0], "--output='experiments_%i.json'" % wedge_id, ] cmd = " ".join(args) print(cmd) result = easy_run.fully_buffered(cmd).raise_if_errors() result.show_stdout() result.show_stderr() args = [ "dials.import_xds", file_name_dict[wedge_id], "experiments_%i.json" % wedge_id, "--input=reflections", "--output='integrate_hkl_%i.pickle'" % wedge_id, ] cmd = " ".join(args) print(cmd) result = easy_run.fully_buffered(cmd).raise_if_errors() result.show_stdout() result.show_stderr() from dials.command_line import find_overlaps args = ["experiments_%i.json" % wedge_id for wedge_id in wedge_ids] args.extend( ["integrate_hkl_%i.pickle" % wedge_id for wedge_id in wedge_ids] ) # args.append("nproc=%s" %params.nproc) args.append( "max_overlap_fraction=%f" % params.overlaps.max_overlap_fraction ) args.append( "max_overlap_pixels=%f" % params.overlaps.max_overlap_pixels ) args.append("n_sigma=%f" % params.overlaps.n_sigma) args.append("save_overlaps=False") overlaps = find_overlaps.run(args) miller_indices = overlaps.overlapping_reflections["miller_index"] overlapping = [ miller_indices.select( overlaps.overlapping_reflections["id"] == i_lattice ) for i_lattice in range(len(wedge_ids)) ] for wedge_id, overlaps in zip(wedge_ids, overlapping): result_dict[wedge_id] = overlaps return result_dict from libtbx import easy_mp results = easy_mp.parallel_map( func=run_find_overlaps, iterable=wedge_number_to_wedge_id.items(), processes=params.nproc, preserve_order=True, asynchronous=False, preserve_exception_message=True, ) for result in results: reject_hkl.update(result) for wedge_n, wedge_ids in wedge_number_to_wedge_id.iteritems(): for wedge in wedge_ids: cmd = """\ pointless -copy xdsin %s hklout integrate_hkl_%03.f.mtz << EOF SPACEGROUP %s EOF """ % ( file_name_dict[wedge], wedge, space_group.type().lookup_symbol(), ) log = open("pointless_%03.f.log" % wedge, "wb") print(cmd, file=log) result = easy_run.fully_buffered(command=cmd) result.show_stdout(out=log) result.show_stderr(out=log) if params.overlaps.find_overlaps: from cctbx import miller from iotbx import mtz m = mtz.object(file_name="integrate_hkl_%03.f.mtz" % wedge) orig_indices = m.extract_original_index_miller_indices() overlaps = reject_hkl.get(wedge) if overlaps is not None and len(overlaps) > 0: matches = miller.match_multi_indices(overlaps, orig_indices) before = m.n_reflections() print("before: %i reflections" % m.n_reflections()) for i_ref in sorted( matches.pair_selection(1).iselection(), reverse=True ): m.delete_reflection(i_ref) after = m.n_reflections() print("after: %i reflections" % m.n_reflections()) m.add_history("Removed %i overlapping reflections" % len(overlaps)) m.write("integrate_hkl_%03.f.mtz" % wedge) g = glob.glob("integrate_hkl_*.mtz") if params.resolve_indexing_ambiguity: from cctbx.command_line import brehm_diederichs args = g args.append("asymmetric=1") args.append("save_plot=True") args.append("show_plot=False") brehm_diederichs.run(args) g = glob.glob("integrate_hkl_*_reindexed.mtz") for file_name in g: wedge_number = int( os.path.splitext(os.path.basename(file_name))[0].replace("_reindexed", "")[ -3: ] ) # print wedge_number, wedge_number result = any_reflection_file(file_name) mtz_object = result.file_content() # if not mtz_object.crystals()[0].crystal_symmetry().is_similar_symmetry( # crystal_symmetry, relative_length_tolerance=0.1): # continue for batch in mtz_object.batches(): batch.set_num(batch.num() + 1000 * wedge_number) batches = mtz_object.get_column("BATCH") batches.set_values(batches.extract_values() + 1000 * wedge_number) mtz_object.write("rebatch-%i.mtz" % (wedge_number)) g = glob.glob("rebatch-*.mtz") cmd = """\ pointless -copy hklin %s hklout pointless.mtz << EOF ALLOW OUTOFSEQUENCEFILES TOLERANCE 4 SPACEGROUP %s EOF """ % ( " ".join(g), space_group.type().lookup_symbol(), ) log = open("pointless_all.log", "wb") print(cmd, file=log) result = easy_run.fully_buffered(command=cmd) result.show_stdout(out=log) result.show_stderr(out=log) cmd = """\ aimless pointless.mtz << EOF OUTPUT UNMERGED TOGETHER %s EOF """ % ( "\n".join(params.aimless.command) ) log = open("aimless.log", "wb") print(cmd, file=log) result = easy_run.fully_buffered(command=cmd) result.show_stdout(out=log) result.show_stderr(out=log)
def format_miller_arrays(self, iparams): ''' Read in mtz file and format to miller_arrays_out object with index[0] --> FP, SIGFP index[1] --> PHIB index[2] --> FOM index[3] --> HLA, HLB, HLC, HLD index[4] --> optional PHIC ''' #readin reflection file reflection_file = reflection_file_reader.any_reflection_file( iparams.data) file_content = reflection_file.file_content() column_labels = file_content.column_labels() col_name = iparams.column_names.split(',') miller_arrays = reflection_file.as_miller_arrays() flex_centric_flags = miller_arrays[0].centric_flags().data() crystal_symmetry = crystal.symmetry( unit_cell=miller_arrays[0].unit_cell(), space_group=miller_arrays[0].space_group()) #grab all required columns flag_fp_found = 0 flag_phib_found = 0 flag_fom_found = 0 flag_hl_found = 0 ind_miller_array_fp = 0 ind_miller_array_phib = 0 ind_miller_array_fom = 0 ind_miller_array_hl = 0 for i in range(len(miller_arrays)): label_string = miller_arrays[i].info().label_string() labels = label_string.split(',') #only look at first index string if labels[0] == col_name[0]: #grab FP, SIGFP flex_fp_all = miller_arrays[i].data() flex_sigmas_all = miller_arrays[i].sigmas() flag_fp_found = 1 ind_miller_array_fp = i elif labels[0] == col_name[2]: #grab PHIB flex_phib_all = miller_arrays[i].data() flag_phib_found = 1 ind_miller_array_phib = i elif labels[0] == col_name[3]: #grab FOM flex_fom_all = miller_arrays[i].data() flag_fom_found = 1 ind_miller_array_fom = i elif labels[0] == col_name[4]: #grab HLA,HLB,HLC,HLD flex_hl_all = miller_arrays[i].data() flag_hl_found = 1 ind_miller_array_hl = i if flag_hl_found == 1 and flag_phib_found == 0: #calculate PHIB and FOM from HL miller_array_phi_fom = miller_arrays[ ind_miller_array_hl].phase_integrals() flex_phib_all = miller_array_phi_fom.phases(deg=True).data() flex_fom_all = miller_array_phi_fom.amplitudes().data() flag_phib_found = 1 flag_fom_found = 1 if flag_fp_found == 0 or flag_phib_found == 0 or flag_fom_found == 0 or flag_hl_found == 0: print "couldn't find all required columns" sys.exit() miller_indices_sel = miller_arrays[ind_miller_array_fp].indices() print 'No. reflections for read-in miller arrays - indices:%6.0f fp:%6.0f phib:%6.0f fom:%6.0f HL:%6.0f)'%( \ len(miller_indices_sel), len(flex_fp_all), len(flex_phib_all), len(flex_fom_all), len(flex_hl_all)) miller_indices = flex.miller_index() flex_fp = flex.double() flex_sigmas = flex.double() flex_phib = flex.double() flex_fom = flex.double() flex_hl = flex.hendrickson_lattman() #format all miller arrays to the same length for miller_index in miller_indices_sel: fp_cn, phib_cn, fom_cn, hl_cn = (0, 0, 0, 0) matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_fp].indices()) if len(matches.pairs()) > 0: fp_cn = 1 fp = flex_fp_all[matches.pairs()[0][1]] sigmas = flex_sigmas_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_phib].indices()) if len(matches.pairs()) > 0: phib_cn = 1 phib = flex_phib_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_fom].indices()) if len(matches.pairs()) > 0: fom_cn = 1 fom = flex_fom_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_hl].indices()) if len(matches.pairs()) > 0: hl_cn = 1 hl = flex_hl_all[matches.pairs()[0][1]] if (fp_cn + phib_cn + fom_cn + hl_cn) == 4: miller_indices.append(miller_index) flex_fp.append(fp) flex_sigmas.append(sigmas) flex_phib.append(phib) flex_fom.append(fom) flex_hl.append(hl) print 'No. reflections after format - indices:%6.0f fp:%6.0f phib:%6.0f fom:%6.0f HL:%6.0f)'%( \ len(miller_indices), len(flex_fp), len(flex_phib), len(flex_fom), len(flex_hl)) flex_hla = flex.double() flex_hlb = flex.double() flex_hlc = flex.double() flex_hld = flex.double() for i in range(len(flex_hl)): data_hl_row = flex_hl[i] flex_hla.append(data_hl_row[0]) flex_hlb.append(data_hl_row[1]) flex_hlc.append(data_hl_row[2]) flex_hld.append(data_hl_row[3]) ''' Read benchmark MTZ (PHICalc) for MPE calculation ''' flex_phic = flex.double([0] * len(flex_fp)) if iparams.hklrefin is not None: reflection_file = reflection_file_reader.any_reflection_file( iparams.hklrefin) miller_arrays_bench = reflection_file.as_miller_arrays() flex_phic_raw = None for i in range(len(miller_arrays_bench)): label_string = miller_arrays_bench[i].info().label_string() labels = label_string.split(',') #only look at first index string if labels[0] == iparams.column_phic: #grab PHIC if miller_arrays_bench[i].is_complex_array(): flex_phic_raw = miller_arrays_bench[i].phases( deg=True).data() else: flex_phic_raw = miller_arrays_bench[i].data() miller_indices_phic = miller_arrays_bench[i].indices() if flex_phic is not None: matches = miller.match_multi_indices( miller_indices_unique=miller_indices, miller_indices=miller_indices_phic) flex_phic = flex.double( [flex_phic_raw[pair[1]] for pair in matches.pairs()]) #format miller_arrays_out miller_set = miller.set(crystal_symmetry=crystal_symmetry, indices=miller_indices, anomalous_flag=False) miller_array_out = miller_set.array( data=flex_fp, sigmas=flex_sigmas).set_observation_type_xray_amplitude() #check if Wilson B-factor is applied flex_fp_for_sort = flex_fp[:] if iparams.flag_apply_b_factor: try: #get wilson_plot from mmtbx.scaling import xtriage from libtbx.utils import null_out xtriage_args = [iparams.data, "", "", "log=tst_xtriage_1.log"] result = xtriage.run(args=xtriage_args, out=null_out()) ws = result.wilson_scaling print 'Wilson K=%6.2f B=%6.2f' % (ws.iso_p_scale, ws.iso_b_wilson) sin_theta_over_lambda_sq = miller_array_out.two_theta(wavelength=iparams.wavelength) \ .sin_theta_over_lambda_sq().data() wilson_expect = flex.exp(-2 * ws.iso_b_wilson * sin_theta_over_lambda_sq) flex_fp_for_sort = wilson_expect * flex_fp except Exception: print 'Error calculating Wilson scale factors. Continue without applying B-factor.' flex_d_spacings = miller_array_out.d_spacings().data() mtz_dataset = miller_array_out.as_mtz_dataset(column_root_label="FP") for data, lbl, typ in [(flex_phib, "PHIB", "P"), (flex_fom, "FOMB", "W"), (flex_hla, "HLA", "A"), (flex_hlb, "HLB", "A"), (flex_hlc, "HLC", "A"), (flex_hld, "HLD", "A"), (flex_phic, "PHIC", "P")]: mtz_dataset.add_miller_array(miller_array_out.array(data=data), column_root_label=lbl, column_types=typ) miller_arrays_out = mtz_dataset.mtz_object().as_miller_arrays() ''' getting sorted indices for the selected reflections in input mtz file list_fp_sort_index: stores indices of sorted FP in descending order ''' import operator fp_sort_index = [ i for (i, j) in sorted(enumerate(flex_fp_for_sort), key=operator.itemgetter(1)) ] fp_sort_index.reverse() """ for i in range(100): print miller_indices[fp_sort_index[i]], flex_d_spacings[fp_sort_index[i]], flex_fp[fp_sort_index[i]], flex_sigmas[fp_sort_index[i]], wilson_expect[fp_sort_index[i]] exit() """ #calculate sum of fp^2 from percent_f_squared flex_fp_squared = flex_fp**2 f_squared_per_stack = (iparams.percent_f_squared * np.sum(flex_fp_squared)) / 100 fp_sort_index_stacks = [] sum_fp_now, i_start = (0, 0) for i in range(len(fp_sort_index)): i_sel = fp_sort_index[i_start:i + 1] sum_fp_now = np.sum([flex_fp_squared[ii_sel] for ii_sel in i_sel]) if sum_fp_now >= f_squared_per_stack: fp_sort_index_stacks.append(fp_sort_index[i_start:i + 1]) i_start = i + 1 if len(fp_sort_index_stacks) == iparams.n_stacks: break txt_out = 'stack_no sum(f_squared) %total n_refl\n' for i in range(len(fp_sort_index_stacks)): sum_fp = np.sum([ flex_fp_squared[ii_sel] for ii_sel in fp_sort_index_stacks[i] ]) txt_out += '%6.0f %14.2f %8.2f %6.0f\n'%(i+1, sum_fp, \ (sum_fp/np.sum(flex_fp_squared))*100, len(fp_sort_index_stacks[i])) return miller_arrays_out, fp_sort_index_stacks, txt_out
def optimize(self, I_r_flex, observations_original, wavelength, crystal_init_orientation, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, iparams, pres_in, observations_non_polar, detector_distance_mm): ph = partiality_handler() lph = lbfgs_partiality_handler() if iparams.postref.allparams.flag_on: refine_steps = ['allparams'] else: refine_steps = ['crystal_orientation'] if iparams.postref.reflecting_range.flag_on: refine_steps.append('reflecting_range') if iparams.postref.unit_cell.flag_on: refine_steps.append('unit_cell') #get miller array iso, if given. miller_array_iso = None #prepare data pr_d_min = iparams.postref.allparams.d_min pr_d_max = iparams.postref.allparams.d_max pr_sigma_min = iparams.postref.allparams.sigma_min pr_partiality_min = iparams.postref.allparams.partiality_min pr_uc_tol = iparams.postref.allparams.uc_tolerance cs = observations_original.crystal_symmetry().space_group().crystal_system() #filter by resolution observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.get_filtered_data(\ 'resolution', [pr_d_min, pr_d_max], observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, I_r_flex) #filter by sigma observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.get_filtered_data(\ 'sigma', [pr_sigma_min], observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, spot_pred_y_mm_sel, I_ref_sel) #initialize values only in the first sub cycle and the first refine step. spot_radius = ph.calc_spot_radius(sqr(crystal_init_orientation.reciprocal_matrix()), observations_original_sel.indices(), wavelength) if pres_in is None: ry, rz, r0, re, voigt_nu, rotx, roty = 0, 0, spot_radius, iparams.gamma_e, iparams.voigt_nu, 0.0, 0.0 #apply constrain on the unit cell using crystal system uc_scale_inp = lph.prep_input(observations_original.unit_cell().parameters(), cs) uc_scale_constrained = lph.prep_output(uc_scale_inp, cs) a,b,c,alpha,beta,gamma = uc_scale_constrained const_params_scale = (rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma) xopt_scalefactors, stats = self.optimize_scalefactors(I_r_flex, observations_original, wavelength, crystal_init_orientation, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, iparams, pres_in, observations_non_polar, detector_distance_mm, const_params_scale) G, B = xopt_scalefactors else: G, B, ry, rz, r0, re, voigt_nu, rotx, roty = pres_in.G, pres_in.B, pres_in.ry, pres_in.rz, pres_in.r0, pres_in.re, pres_in.voigt_nu, 0.0 , 0.0 a,b,c,alpha,beta,gamma = pres_in.unit_cell.parameters() crystal_init_orientation = pres_in.crystal_orientation #filter by partiality two_theta = observations_original_sel.two_theta(wavelength=wavelength).data() uc = unit_cell((a,b,c,alpha,beta,gamma)) partiality_init, delta_xy_init, rs_init, dummy = ph.calc_partiality_anisotropy_set(uc, rotx, roty, observations_original_sel.indices(), ry, rz, r0, re, voigt_nu, two_theta, alpha_angle_sel, wavelength, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence) observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.get_filtered_data(\ 'partiality', [pr_partiality_min], observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, spot_pred_y_mm_sel, I_ref_sel, partiality_in=partiality_init) I_r_true = I_ref_sel[:] I_o_true = observations_original_sel.data()[:] #calculate initial residual_xy error const_params_uc = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu) xinp_uc = lph.prep_input((a,b,c,alpha,beta,gamma), cs) args_uc = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'unit_cell', const_params_uc, B, miller_array_iso, iparams) uc_params_err = lph.func(xinp_uc, args_uc) init_residual_xy_err = flex.sum(uc_params_err**2) #calculate initial residual_pr error const_params_all= (G,B) xinp_all = flex.double([rotx, roty, ry, rz, r0, re, voigt_nu]) xinp_all.extend(lph.prep_input((a,b,c,alpha,beta,gamma), cs)) args_all = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'allparams', const_params_all, B, miller_array_iso, iparams) all_params_err = lph.func(xinp_all, args_all) init_residual_err = flex.sum(all_params_err**2) #keep in list t_pr_list = [init_residual_err] t_xy_list = [init_residual_xy_err] refined_params_hist = [(G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma)] txt_out = '' for i_sub_cycle in range(iparams.n_postref_sub_cycle): for j_refine_step in range(len(refine_steps)): refine_mode = refine_steps[j_refine_step] #prepare data init_params = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma) observations_original_sel, alpha_angle_sel, spot_pred_x_mm_sel, \ spot_pred_y_mm_sel, I_ref_sel = self.prepare_data_microcycle(refine_mode, iparams, observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, I_r_flex, init_params, crystal_init_orientation, wavelength, detector_distance_mm) I_r_true = I_ref_sel[:] I_o_true = observations_original_sel.data() if refine_mode == 'crystal_orientation': xinp = flex.double([rotx, roty]) const_params = (G, B, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma) elif refine_mode == 'reflecting_range': xinp = flex.double([ry, rz, r0, re, voigt_nu]) const_params = (G, B, rotx, roty, a, b, c, alpha, beta, gamma) elif refine_mode == 'unit_cell': xinp = lph.prep_input((a,b,c,alpha,beta,gamma), cs) const_params = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu) elif refine_mode == 'allparams': xinp = flex.double([rotx, roty, ry, rz, r0, re, voigt_nu]) xinp.extend(lph.prep_input((a,b,c,alpha,beta,gamma), cs)) const_params = (G,B) args=(I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, refine_mode, const_params, B, miller_array_iso, iparams) lh = lbfgs_handler(current_x=xinp, args=args) xopt = flex.double(list(lh.x)) if refine_mode == 'crystal_orientation' or \ refine_mode == 'reflecting_range' or refine_mode == 'allparams': current_residual_err = lh.f #calculate residual_xy_error (for refine_mode = SF, CO, RR, and all params) xinp_uc = lph.prep_input((a,b,c,alpha,beta,gamma), cs) if refine_mode == 'crystal_orientation': rotx, roty = xopt elif refine_mode == 'reflecting_range': ry, rz, r0, re, voigt_nu = xopt elif refine_mode == 'allparams': rotx, roty, ry, rz, r0, re, voigt_nu = xopt[:7] xinp_uc = xopt[7:] a, b, c, alpha, beta, gamma = lph.prep_output(xinp_uc, cs) const_params_uc = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu) xinp_uc = lph.prep_input((a,b,c,alpha,beta,gamma), cs) args_uc = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'unit_cell', const_params_uc, B, miller_array_iso, iparams) uc_params_err = lph.func(xinp_uc, args_uc) current_residual_xy_err = flex.sum(uc_params_err**2) elif refine_mode == 'unit_cell': current_residual_xy_err = lh.f xopt_uc = lph.prep_output(xopt, cs) a, b, c, alpha, beta, gamma = xopt_uc #check the unit-cell with the reference intensity xinp = flex.double([rotx, roty, ry, rz, r0, re, voigt_nu]) xinp.extend(lph.prep_input((a, b, c, alpha, beta, gamma), cs)) const_params_all = (G,B) args_all = (I_r_true, observations_original_sel, wavelength, alpha_angle_sel, crystal_init_orientation, spot_pred_x_mm_sel, spot_pred_y_mm_sel, detector_distance_mm, 'allparams', const_params_all, B, miller_array_iso, iparams) all_params_err = lph.func(xinp_all, args_all) current_residual_err = flex.sum(all_params_err**2) flag_success = False if refine_mode == 'allparams': #if allparams refinement, only check the post-refine target function if current_residual_err < (t_pr_list[len(t_pr_list)-1] + \ (t_pr_list[len(t_pr_list)-1]*iparams.postref.residual_threshold/100)): t_pr_list.append(current_residual_err) t_xy_list.append(current_residual_xy_err) refined_params_hist.append((G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma)) flag_success = True else: if current_residual_err < (t_pr_list[len(t_pr_list)-1] + \ (t_pr_list[len(t_pr_list)-1]*iparams.postref.residual_threshold/100)): if current_residual_xy_err < (t_xy_list[len(t_xy_list)-1] + \ (t_xy_list[len(t_xy_list)-1]*iparams.postref.residual_threshold_xy/100)): t_pr_list.append(current_residual_err) t_xy_list.append(current_residual_xy_err) refined_params_hist.append((G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a, b, c, alpha, beta, gamma)) flag_success = True if flag_success is False: G,B,rotx,roty,ry,rz,r0,re,voigt_nu,a,b,c,alpha,beta,gamma = refined_params_hist[len(refined_params_hist)-1] tmp_txt_out = refine_mode + ' %3.0f %6.4f %6.4f %6.4f %6.4f %10.8f %10.8f %10.8f %10.8f %10.8f %6.3f %6.3f %.4g %6.3f\n'%(i_sub_cycle,G,B,rotx*180/math.pi,roty*180/math.pi,ry,rz,r0,re,voigt_nu,a,c,t_pr_list[len(t_pr_list)-1],t_xy_list[len(t_pr_list)-1]) txt_out += tmp_txt_out #apply the refined parameters on the full (original) reflection set two_theta = observations_original.two_theta(wavelength=wavelength).data() sin_theta_over_lambda_sq = observations_original.two_theta(wavelength=wavelength).sin_theta_over_lambda_sq().data() if pres_in is None: partiality_init, delta_xy_init, rs_init, rh_init = ph.calc_partiality_anisotropy_set(\ observations_original.unit_cell(),0.0, 0.0,observations_original.indices(), 0, 0, spot_radius, iparams.gamma_e, iparams.voigt_nu, two_theta, alpha_angle, wavelength, crystal_init_orientation,spot_pred_x_mm, spot_pred_y_mm,detector_distance_mm, iparams.partiality_model,iparams.flag_beam_divergence) I_o_init = ph.calc_full_refl(observations_original.data(), sin_theta_over_lambda_sq, 1, 0, partiality_init, rs_init) else: partiality_init, delta_xy_init, rs_init, rh_init = ph.calc_partiality_anisotropy_set(\ pres_in.unit_cell,0.0, 0.0,observations_original.indices(), pres_in.ry, pres_in.rz,pres_in.r0, pres_in.re, pres_in.voigt_nu, two_theta, alpha_angle, wavelength, crystal_init_orientation,spot_pred_x_mm, spot_pred_y_mm,detector_distance_mm, iparams.partiality_model,iparams.flag_beam_divergence) I_o_init = ph.calc_full_refl(observations_original.data(), sin_theta_over_lambda_sq, pres_in.G, pres_in.B, partiality_init, rs_init) partiality_fin, delta_xy_fin, rs_fin, rh_fin = ph.calc_partiality_anisotropy_set(\ unit_cell((a,b,c,alpha,beta,gamma)),rotx, roty,observations_original.indices(), ry, rz, r0, re, voigt_nu, two_theta, alpha_angle, wavelength,crystal_init_orientation, spot_pred_x_mm, spot_pred_y_mm,detector_distance_mm, iparams.partiality_model,iparams.flag_beam_divergence) I_o_fin = ph.calc_full_refl(observations_original.data(), sin_theta_over_lambda_sq, G, B, partiality_fin, rs_fin) SE_of_the_estimate = standard_error_of_the_estimate(I_r_flex,I_o_fin, 13) R_sq = coefficient_of_determination(I_r_flex,I_o_fin)*100 CC_init = flex.linear_correlation(I_r_flex, I_o_init).coefficient() CC_final = flex.linear_correlation(I_r_flex, I_o_fin).coefficient() err_init = (I_r_flex - I_o_init)/observations_original.sigmas() R_init = math.sqrt(flex.sum(err_init**2)) err_final = (I_r_flex - I_o_fin)/observations_original.sigmas() R_final = math.sqrt(flex.sum(err_final**2)) R_xy_init = math.sqrt(flex.sum(delta_xy_init**2)) R_xy_final = math.sqrt(flex.sum(delta_xy_fin**2)) if R_init < R_final or re > (iparams.gamma_e * 10): CC_final = CC_init R_final = R_init R_xy_final = R_xy_init if pres_in is None: G,B,r0,ry,rz,re,rotx,roty = (1.0,0.0,spot_radius,0.0,0.0,iparams.gamma_e,0.0,0.0) a,b,c,alpha,beta,gamma = observations_original.unit_cell().parameters() else: G,B,r0,ry,rz,re,rotx,roty = (pres_in.G,pres_in.B,pres_in.r0,pres_in.ry,pres_in.rz,pres_in.re,0.0,0.0) a,b,c,alpha,beta,gamma = pres_in.unit_cell.parameters() crystal_init_orientation = pres_in.crystal_orientation #calculate CCiso if hklisoin is given CC_iso_init,CC_iso_final = (0,0) if iparams.hklisoin is not None: if miller_array_iso is not None: from cctbx import miller matches = miller.match_multi_indices( miller_indices_unique=miller_array_iso.indices(), miller_indices=observations_non_polar.indices()) I_iso_match = flex.double([miller_array_iso.data()[pair[0]] for pair in matches.pairs()]) I_o_init_match = flex.double([I_o_init[pair[1]] for pair in matches.pairs()]) I_o_fin_match = flex.double([I_o_fin[pair[1]] for pair in matches.pairs()]) CC_iso_init = flex.linear_correlation(I_iso_match, I_o_init_match).coefficient() CC_iso_final = flex.linear_correlation(I_iso_match, I_o_fin_match).coefficient() xopt = (G, B, rotx, roty, ry, rz, r0, re, voigt_nu, a,b,c,alpha,beta,gamma) return xopt, (SE_of_the_estimate, R_sq, CC_init, CC_final, R_init, R_final, R_xy_init, R_xy_final, CC_iso_init, CC_iso_final), len(I_ref_sel)
def postrefine_by_frame(self, frame_no, pres_in, iparams, miller_array_ref, avg_mode): #Prepare data if pres_in is None: return None, 'Found empty pickle file' observations_pickle = pickle.load(open(pres_in.pickle_filename,"rb")) wavelength = observations_pickle["wavelength"] crystal_init_orientation = observations_pickle["current_orientation"][0] pickle_filename = pres_in.pickle_filename pickle_filepaths = pickle_filename.split('/') img_filename_only = pickle_filepaths[len(pickle_filepaths)-1] txt_exception = ' {0:40} ==> '.format(img_filename_only) inputs, txt_organize_input = self.organize_input(observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename) if inputs is not None: observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, \ detector_distance_mm, identified_isoform, mapped_predictions, xbeam, ybeam = inputs else: txt_exception += txt_organize_input + '\n' return None, txt_exception #Select data for post-refinement (only select indices that are common with the reference set observations_non_polar, index_basis_name = self.get_observations_non_polar(observations_original, pickle_filename, iparams) matches = miller.match_multi_indices( miller_indices_unique=miller_array_ref.indices(), miller_indices=observations_non_polar.indices()) I_ref_match = flex.double([miller_array_ref.data()[pair[0]] for pair in matches.pairs()]) miller_indices_ref_match = flex.miller_index((miller_array_ref.indices()[pair[0]] for pair in matches.pairs())) I_obs_match = flex.double([observations_non_polar.data()[pair[1]] for pair in matches.pairs()]) sigI_obs_match = flex.double([observations_non_polar.sigmas()[pair[1]] for pair in matches.pairs()]) miller_indices_original_obs_match = flex.miller_index((observations_original.indices()[pair[1]] \ for pair in matches.pairs())) miller_indices_non_polar_obs_match = flex.miller_index((observations_non_polar.indices()[pair[1]] \ for pair in matches.pairs())) alpha_angle_set = flex.double([alpha_angle[pair[1]] for pair in matches.pairs()]) spot_pred_x_mm_set = flex.double([spot_pred_x_mm[pair[1]] for pair in matches.pairs()]) spot_pred_y_mm_set = flex.double([spot_pred_y_mm[pair[1]] for pair in matches.pairs()]) references_sel = miller_array_ref.customized_copy(data=I_ref_match, indices=miller_indices_ref_match) observations_original_sel = observations_original.customized_copy(data=I_obs_match, sigmas=sigI_obs_match, indices=miller_indices_original_obs_match) observations_non_polar_sel = observations_non_polar.customized_copy(data=I_obs_match, sigmas=sigI_obs_match, indices=miller_indices_non_polar_obs_match) #Do least-squares refinement lsqrh = leastsqr_handler() try: refined_params, stats, n_refl_postrefined = lsqrh.optimize(I_ref_match, observations_original_sel, wavelength, crystal_init_orientation, alpha_angle_set, spot_pred_x_mm_set, spot_pred_y_mm_set, iparams, pres_in, observations_non_polar_sel, detector_distance_mm) except Exception: txt_exception += 'optimization failed.\n' return None, txt_exception #caculate partiality for output (with target_anomalous check) G_fin, B_fin, rotx_fin, roty_fin, ry_fin, rz_fin, r0_fin, re_fin, voigt_nu_fin, \ a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin = refined_params inputs, txt_organize_input = self.organize_input(observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename) observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, \ detector_distance_mm, identified_isoform, mapped_predictions, xbeam, ybeam = inputs observations_non_polar, index_basis_name = self.get_observations_non_polar(observations_original, pickle_filename, iparams) from cctbx.uctbx import unit_cell uc_fin = unit_cell((a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin)) crystal_init_orientation = pres_in.crystal_orientation two_theta = observations_original.two_theta(wavelength=wavelength).data() ph = partiality_handler() partiality_fin, dummy, rs_fin, rh_fin = ph.calc_partiality_anisotropy_set(uc_fin, rotx_fin, roty_fin, observations_original.indices(), ry_fin, rz_fin, r0_fin, re_fin, voigt_nu_fin, two_theta, alpha_angle, wavelength, crystal_init_orientation, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence) #calculate the new crystal orientation O = sqr(uc_fin.orthogonalization_matrix()).transpose() R = sqr(crystal_init_orientation.crystal_rotation_matrix()).transpose() from cctbx.crystal_orientation import crystal_orientation, basis_type CO = crystal_orientation(O*R, basis_type.direct) crystal_fin_orientation = CO.rotate_thru((1,0,0), rotx_fin ).rotate_thru((0,1,0), roty_fin) #remove reflections with partiality below threshold i_sel = partiality_fin > iparams.merge.partiality_min partiality_fin_sel = partiality_fin.select(i_sel) rs_fin_sel = rs_fin.select(i_sel) rh_fin_sel = rh_fin.select(i_sel) observations_non_polar_sel = observations_non_polar.select(i_sel) observations_original_sel = observations_original.select(i_sel) mapped_predictions = mapped_predictions.select(i_sel) pres = postref_results() pres.set_params(observations = observations_non_polar_sel, observations_original = observations_original_sel, refined_params=refined_params, stats=stats, partiality=partiality_fin_sel, rs_set=rs_fin_sel, rh_set=rh_fin_sel, frame_no=frame_no, pickle_filename=pickle_filename, wavelength=wavelength, crystal_orientation=crystal_init_orientation, detector_distance_mm=detector_distance_mm, identified_isoform=identified_isoform, mapped_predictions=mapped_predictions, xbeam=xbeam, ybeam=ybeam) r_change, r_xy_change, cc_change, cc_iso_change = (0,0,0,0) try: r_change = ((pres.R_final - pres.R_init)/pres.R_init)*100 r_xy_change = ((pres.R_xy_final - pres.R_xy_init)/pres.R_xy_init)*100 cc_change = ((pres.CC_final - pres.CC_init)/pres.CC_init)*100 cc_iso_change = ((pres.CC_iso_final - pres.CC_iso_init)/pres.CC_iso_init)*100 except Exception: pass txt_postref= ' {0:40} ==> RES:{1:5.2f} NREFL:{2:5d} R:{3:8.2f}% RXY:{4:8.2f}% CC:{5:6.2f}% CCISO:{6:6.2f}% G:{7:10.3e} B:{8:7.1f} CELL:{9:6.2f} {10:6.2f} {11:6.2f} {12:6.2f} {13:6.2f} {14:6.2f}'.format(img_filename_only+' ('+index_basis_name+')', observations_original_sel.d_min(), len(observations_original_sel.data()), r_change, r_xy_change, cc_change, cc_iso_change, pres.G, pres.B, a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin) print txt_postref txt_postref += '\n' return pres, txt_postref
def __init__(self,measurements_orig, params, i_model, miller_set, result, out): measurements = measurements_orig.deep_copy() # Now manipulate the data to conform to unit cell, asu, and space group # of reference. The resolution will be cut later. # Only works if there is NOT an indexing ambiguity! observations = measurements.customized_copy( anomalous_flag=not params.merge_anomalous, crystal_symmetry=miller_set.crystal_symmetry() ).map_to_asu() observations_original_index = measurements.customized_copy( anomalous_flag=not params.merge_anomalous, crystal_symmetry=miller_set.crystal_symmetry() ) # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. assert len(i_model.indices()) == len(miller_set.indices()) \ and (i_model.indices() == miller_set.indices()).count(False) == 0 matches = miller.match_multi_indices( miller_indices_unique=miller_set.indices(), miller_indices=observations.indices()) pair1 = flex.int([pair[1] for pair in matches.pairs()]) pair0 = flex.int([pair[0] for pair in matches.pairs()]) # narrow things down to the set that matches, only observations_pair1_selected = observations.customized_copy( indices = flex.miller_index([observations.indices()[p] for p in pair1]), data = flex.double([observations.data()[p] for p in pair1]), sigmas = flex.double([observations.sigmas()[p] for p in pair1]), ) observations_original_index_pair1_selected = observations_original_index.customized_copy( indices = flex.miller_index([observations_original_index.indices()[p] for p in pair1]), data = flex.double([observations_original_index.data()[p] for p in pair1]), sigmas = flex.double([observations_original_index.sigmas()[p] for p in pair1]), ) ################### I_observed = observations_pair1_selected.data() chosen = chosen_weights(observations_pair1_selected, params) MILLER = observations_original_index_pair1_selected.indices() ORI = result["current_orientation"][0] Astar = matrix.sqr(ORI.reciprocal_matrix()) WAVE = result["wavelength"] BEAM = matrix.col((0.0,0.0,-1./WAVE)) BFACTOR = 0. #calculation of correlation here I_reference = flex.double([i_model.data()[pair[0]] for pair in matches.pairs()]) use_weights = False # New facility for getting variance-weighted correlation if use_weights: #variance weighting I_weight = flex.double( [1./(observations_pair1_selected.sigmas()[pair[1]])**2 for pair in matches.pairs()]) else: I_weight = flex.double(len(observations_pair1_selected.sigmas()), 1.) """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement: include_negatives = True + and - reflections both used for Rh distribution for initial estimate of RS parameter + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) include_negatives = False + and - reflections both used for Rh distribution for initial estimate of RS parameter + reflections only used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) """ if params.include_negatives: SWC = simple_weighted_correlation(I_weight, I_reference, I_observed) else: non_positive = ( observations_pair1_selected.data() <= 0 ) SWC = simple_weighted_correlation(I_weight.select(~non_positive), I_reference.select(~non_positive), I_observed.select(~non_positive)) print >> out, "Old correlation is", SWC.corr assert params.postrefinement.algorithm=="rs_hybrid" Rhall = flex.double() for mill in MILLER: H = matrix.col(mill) Xhkl = Astar*H Rh = ( Xhkl + BEAM ).length() - (1./WAVE) Rhall.append(Rh) Rs = math.sqrt(flex.mean(Rhall*Rhall)) RS = 1./10000. # reciprocal effective domain size of 1 micron RS = Rs # try this empirically determined approximate, monochrome, a-mosaic value self.rs2_current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.]) self.rs2_parameterization_class = rs_parameterization self.rs2_refinery = rs2_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC = I_reference, IOBSVEC = I_observed, WEIGHTS = chosen) self.rs2_refinery.set_profile_shape(params.postrefinement.lineshape) self.nave1_refinery = nave1_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC = I_reference, IOBSVEC = I_observed, WEIGHTS = chosen) self.nave1_refinery.set_profile_shape(params.postrefinement.lineshape) self.out=out; self.params = params; self.miller_set = miller_set self.observations_pair1_selected = observations_pair1_selected; self.observations_original_index_pair1_selected = observations_original_index_pair1_selected self.i_model = i_model
def __init__(self, measurements_orig, params, i_model, miller_set, result, out): measurements = measurements_orig.deep_copy() # Now manipulate the data to conform to unit cell, asu, and space group # of reference. The resolution will be cut later. # Only works if there is NOT an indexing ambiguity! observations = measurements.customized_copy( anomalous_flag=not params.merge_anomalous, crystal_symmetry=miller_set.crystal_symmetry()).map_to_asu() observations_original_index = measurements.customized_copy( anomalous_flag=not params.merge_anomalous, crystal_symmetry=miller_set.crystal_symmetry()) # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. assert len(i_model.indices()) == len(miller_set.indices()) \ and (i_model.indices() == miller_set.indices()).count(False) == 0 matches = miller.match_multi_indices( miller_indices_unique=miller_set.indices(), miller_indices=observations.indices()) pair1 = flex.int([pair[1] for pair in matches.pairs()]) pair0 = flex.int([pair[0] for pair in matches.pairs()]) # narrow things down to the set that matches, only observations_pair1_selected = observations.customized_copy( indices=flex.miller_index( [observations.indices()[p] for p in pair1]), data=flex.double([observations.data()[p] for p in pair1]), sigmas=flex.double([observations.sigmas()[p] for p in pair1]), ) observations_original_index_pair1_selected = observations_original_index.customized_copy( indices=flex.miller_index( [observations_original_index.indices()[p] for p in pair1]), data=flex.double( [observations_original_index.data()[p] for p in pair1]), sigmas=flex.double( [observations_original_index.sigmas()[p] for p in pair1]), ) ################### I_observed = observations_pair1_selected.data() MILLER = observations_original_index_pair1_selected.indices() ORI = result["current_orientation"][0] Astar = matrix.sqr(ORI.reciprocal_matrix()) WAVE = result["wavelength"] BEAM = matrix.col((0.0, 0.0, -1. / WAVE)) BFACTOR = 0. #calculation of correlation here I_reference = flex.double( [i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) use_weights = False # New facility for getting variance-weighted correlation if use_weights: #variance weighting I_weight = flex.double([ 1. / (observations_pair1_selected.sigmas()[pair[1]])**2 for pair in matches.pairs() ]) else: I_weight = flex.double(len(observations_pair1_selected.sigmas()), 1.) I_weight.set_selected(I_invalid, 0.) """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement: include_negatives = True + and - reflections both used for Rh distribution for initial estimate of RS parameter + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) include_negatives = False + and - reflections both used for Rh distribution for initial estimate of RS parameter + reflections only used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) """ if params.include_negatives: SWC = simple_weighted_correlation(I_weight, I_reference, I_observed) else: non_positive = (observations_pair1_selected.data() <= 0) SWC = simple_weighted_correlation( I_weight.select(~non_positive), I_reference.select(~non_positive), I_observed.select(~non_positive)) print >> out, "Old correlation is", SWC.corr if params.postrefinement.algorithm == "rs": Rhall = flex.double() for mill in MILLER: H = matrix.col(mill) Xhkl = Astar * H Rh = (Xhkl + BEAM).length() - (1. / WAVE) Rhall.append(Rh) Rs = math.sqrt(flex.mean(Rhall * Rhall)) RS = 1. / 10000. # reciprocal effective domain size of 1 micron RS = Rs # try this empirically determined approximate, monochrome, a-mosaic value current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.]) parameterization_class = rs_parameterization refinery = rs_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) elif params.postrefinement.algorithm == "eta_deff": eta_init = 2. * result["ML_half_mosaicity_deg"][0] * math.pi / 180. D_eff_init = 2. * result["ML_domain_size_ang"][0] current = flex.double([ SWC.slope, BFACTOR, eta_init, 0., 0., D_eff_init, ]) parameterization_class = eta_deff_parameterization refinery = eta_deff_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) func = refinery.fvec_callable(parameterization_class(current)) functional = flex.sum(func * func) print >> out, "functional", functional self.current = current self.parameterization_class = parameterization_class self.refinery = refinery self.out = out self.params = params self.miller_set = miller_set self.observations_pair1_selected = observations_pair1_selected self.observations_original_index_pair1_selected = observations_original_index_pair1_selected
def run(self, experiments, reflections): self.logger.log_step_time("POSTREFINEMENT") if not self.params.postrefinement.enable: self.logger.log("Postrefinement was not done") if self.mpi_helper.rank == 0: self.logger.main_log("Postrefinement was not done") return experiments, reflections target_symm = symmetry( unit_cell=self.params.scaling.unit_cell, space_group_info=self.params.scaling.space_group) i_model = self.params.scaling.i_model miller_set = self.params.scaling.miller_set # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. assert len(i_model.indices()) == len(miller_set.indices()) assert (i_model.indices() == miller_set.indices()).count(False) == 0 new_experiments = ExperimentList() new_reflections = flex.reflection_table() experiments_rejected_by_reason = {} # reason:how_many_rejected for experiment in experiments: exp_reflections = reflections.select( reflections['exp_id'] == experiment.identifier) # Build a miller array for the experiment reflections with original miller indexes exp_miller_indices_original = miller.set( target_symm, exp_reflections['miller_index'], True) observations_original_index = miller.array( exp_miller_indices_original, exp_reflections['intensity.sum.value'], flex.double( flex.sqrt(exp_reflections['intensity.sum.variance']))) assert exp_reflections.size() == exp_miller_indices_original.size() assert observations_original_index.size( ) == exp_miller_indices_original.size() # Build a miller array for the experiment reflections with asu miller indexes exp_miller_indices_asu = miller.set( target_symm, exp_reflections['miller_index_asymmetric'], True) observations = miller.array( exp_miller_indices_asu, exp_reflections['intensity.sum.value'], flex.double( flex.sqrt(exp_reflections['intensity.sum.variance']))) matches = miller.match_multi_indices( miller_indices_unique=miller_set.indices(), miller_indices=observations.indices()) pair1 = flex.int([pair[1] for pair in matches.pairs() ]) # refers to the observations pair0 = flex.int([pair[0] for pair in matches.pairs() ]) # refers to the model assert exp_reflections.size() == exp_miller_indices_original.size() assert observations_original_index.size( ) == exp_miller_indices_original.size() # narrow things down to the set that matches, only observations_pair1_selected = observations.customized_copy( indices=flex.miller_index( [observations.indices()[p] for p in pair1]), data=flex.double([observations.data()[p] for p in pair1]), sigmas=flex.double([observations.sigmas()[p] for p in pair1])) observations_original_index_pair1_selected = observations_original_index.customized_copy( indices=flex.miller_index( [observations_original_index.indices()[p] for p in pair1]), data=flex.double( [observations_original_index.data()[p] for p in pair1]), sigmas=flex.double( [observations_original_index.sigmas()[p] for p in pair1])) I_observed = observations_pair1_selected.data() MILLER = observations_original_index_pair1_selected.indices() ORI = crystal_orientation(experiment.crystal.get_A(), basis_type.reciprocal) Astar = matrix.sqr(ORI.reciprocal_matrix()) Astar_from_experiment = matrix.sqr(experiment.crystal.get_A()) assert Astar == Astar_from_experiment WAVE = experiment.beam.get_wavelength() BEAM = matrix.col((0.0, 0.0, -1. / WAVE)) BFACTOR = 0. MOSAICITY_DEG = experiment.crystal.get_half_mosaicity_deg() DOMAIN_SIZE_A = experiment.crystal.get_domain_size_ang() # calculation of correlation here I_reference = flex.double( [i_model.data()[pair[0]] for pair in matches.pairs()]) I_invalid = flex.bool( [i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()]) use_weights = False # New facility for getting variance-weighted correlation if use_weights: # variance weighting I_weight = flex.double([ 1. / (observations_pair1_selected.sigmas()[pair[1]])**2 for pair in matches.pairs() ]) else: I_weight = flex.double( len(observations_pair1_selected.sigmas()), 1.) I_weight.set_selected(I_invalid, 0.) """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement: include_negatives = True + and - reflections both used for Rh distribution for initial estimate of RS parameter + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) include_negatives = False + and - reflections both used for Rh distribution for initial estimate of RS parameter + reflections only used for calc/obs correlation slope for initial estimate of G parameter + and - reflections both passed to the refinery and used in the target function (makes sense if you look at it from a certain point of view) """ # RB: By design, for MPI-Merge "include negatives" is implicitly True SWC = simple_weighted_correlation(I_weight, I_reference, I_observed) if self.params.output.log_level == 0: self.logger.log("Old correlation is: %f" % SWC.corr) if self.params.postrefinement.algorithm == "rs": Rhall = flex.double() for mill in MILLER: H = matrix.col(mill) Xhkl = Astar * H Rh = (Xhkl + BEAM).length() - (1. / WAVE) Rhall.append(Rh) Rs = math.sqrt(flex.mean(Rhall * Rhall)) RS = 1. / 10000. # reciprocal effective domain size of 1 micron RS = Rs # try this empirically determined approximate, monochrome, a-mosaic value current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.]) parameterization_class = rs_parameterization refinery = rs_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) elif self.params.postrefinement.algorithm == "eta_deff": eta_init = 2. * MOSAICITY_DEG * math.pi / 180. D_eff_init = 2. * DOMAIN_SIZE_A current = flex.double( [SWC.slope, BFACTOR, eta_init, 0., 0., D_eff_init]) parameterization_class = eta_deff_parameterization refinery = eta_deff_refinery(ORI=ORI, MILLER=MILLER, BEAM=BEAM, WAVE=WAVE, ICALCVEC=I_reference, IOBSVEC=I_observed) func = refinery.fvec_callable(parameterization_class(current)) functional = flex.sum(func * func) if self.params.output.log_level == 0: self.logger.log("functional: %f" % functional) self.current = current self.parameterization_class = parameterization_class self.refinery = refinery self.observations_pair1_selected = observations_pair1_selected self.observations_original_index_pair1_selected = observations_original_index_pair1_selected error_detected = False try: self.run_plain() result_observations_original_index, result_observations, result_matches = self.result_for_cxi_merge( ) assert result_observations_original_index.size( ) == result_observations.size() assert result_matches.pairs().size( ) == result_observations_original_index.size() except (AssertionError, ValueError, RuntimeError) as e: error_detected = True reason = repr(e) if not reason: reason = "Unknown error" if not reason in experiments_rejected_by_reason: experiments_rejected_by_reason[reason] = 1 else: experiments_rejected_by_reason[reason] += 1 if not error_detected: new_experiments.append(experiment) new_exp_reflections = flex.reflection_table() new_exp_reflections[ 'miller_index_asymmetric'] = flex.miller_index( result_observations.indices()) new_exp_reflections['intensity.sum.value'] = flex.double( result_observations.data()) new_exp_reflections['intensity.sum.variance'] = flex.double( flex.pow(result_observations.sigmas(), 2)) new_exp_reflections['exp_id'] = flex.std_string( len(new_exp_reflections), experiment.identifier) new_reflections.extend(new_exp_reflections) ''' # debugging elif reason.startswith("ValueError"): self.logger.log("Rejected b/c of value error exp id: %s; unit cell: %s"%(exp_id, str(experiment.crystal.get_unit_cell())) ) ''' # report rejected experiments, reflections experiments_rejected_by_postrefinement = len(experiments) - len( new_experiments) reflections_rejected_by_postrefinement = reflections.size( ) - new_reflections.size() self.logger.log("Experiments rejected by post-refinement: %d" % experiments_rejected_by_postrefinement) self.logger.log("Reflections rejected by post-refinement: %d" % reflections_rejected_by_postrefinement) all_reasons = [] for reason, count in experiments_rejected_by_reason.iteritems(): self.logger.log("Experiments rejected due to %s: %d" % (reason, count)) all_reasons.append(reason) comm = self.mpi_helper.comm MPI = self.mpi_helper.MPI # Collect all rejection reasons from all ranks. Use allreduce to let each rank have all reasons. all_reasons = comm.allreduce(all_reasons, MPI.SUM) all_reasons = set(all_reasons) # Now that each rank has all reasons from all ranks, we can treat the reasons in a uniform way. total_experiments_rejected_by_reason = {} for reason in all_reasons: rejected_experiment_count = 0 if reason in experiments_rejected_by_reason: rejected_experiment_count = experiments_rejected_by_reason[ reason] total_experiments_rejected_by_reason[reason] = comm.reduce( rejected_experiment_count, MPI.SUM, 0) total_accepted_experiment_count = comm.reduce(len(new_experiments), MPI.SUM, 0) # how many reflections have we rejected due to post-refinement? rejected_reflections = len(reflections) - len(new_reflections) total_rejected_reflections = self.mpi_helper.sum(rejected_reflections) if self.mpi_helper.rank == 0: for reason, count in total_experiments_rejected_by_reason.iteritems( ): self.logger.main_log( "Total experiments rejected due to %s: %d" % (reason, count)) self.logger.main_log("Total experiments accepted: %d" % total_accepted_experiment_count) self.logger.main_log( "Total reflections rejected due to post-refinement: %d" % total_rejected_reflections) self.logger.log_step_time("POSTREFINEMENT", True) return new_experiments, new_reflections
def format_miller_arrays(self, iparams): ''' Read in mtz file and format to miller_arrays_out object with index[0] --> FP, SIGFP index[1] --> PHIB index[2] --> FOM index[3] --> HLA, HLB, HLC, HLD index[4] --> optional PHIC ''' #readin reflection file reflection_file = reflection_file_reader.any_reflection_file(iparams.data) file_content=reflection_file.file_content() column_labels=file_content.column_labels() col_name=iparams.column_names.split(',') miller_arrays=reflection_file.as_miller_arrays() flex_centric_flags = miller_arrays[0].centric_flags().data() crystal_symmetry = crystal.symmetry( unit_cell=miller_arrays[0].unit_cell(), space_group=miller_arrays[0].space_group()) #grab all required columns flag_fp_found = 0 flag_phib_found = 0 flag_fom_found = 0 flag_hl_found = 0 ind_miller_array_fp = 0 ind_miller_array_phib = 0 ind_miller_array_fom = 0 ind_miller_array_hl = 0 for i in range(len(miller_arrays)): label_string = miller_arrays[i].info().label_string() labels=label_string.split(',') #only look at first index string if labels[0]==col_name[0]: #grab FP, SIGFP flex_fp_all=miller_arrays[i].data() flex_sigmas_all=miller_arrays[i].sigmas() flag_fp_found=1 ind_miller_array_fp = i elif labels[0]==col_name[2]: #grab PHIB flex_phib_all=miller_arrays[i].data() flag_phib_found=1 ind_miller_array_phib = i elif labels[0]==col_name[3]: #grab FOM flex_fom_all=miller_arrays[i].data() flag_fom_found=1 ind_miller_array_fom = i elif labels[0]==col_name[4]: #grab HLA,HLB,HLC,HLD flex_hl_all=miller_arrays[i].data() flag_hl_found=1 ind_miller_array_hl = i if flag_hl_found==1 and flag_phib_found == 0: #calculate PHIB and FOM from HL miller_array_phi_fom = miller_arrays[ind_miller_array_hl].phase_integrals() flex_phib_all = miller_array_phi_fom.phases(deg=True).data() flex_fom_all = miller_array_phi_fom.amplitudes().data() flag_phib_found = 1 flag_fom_found = 1 if flag_fp_found==0 or flag_phib_found==0 or flag_fom_found==0 or flag_hl_found==0: print "couldn't find all required columns" sys.exit() miller_indices_sel = miller_arrays[ind_miller_array_fp].indices() print 'No. reflections for read-in miller arrays - indices:%6.0f fp:%6.0f phib:%6.0f fom:%6.0f HL:%6.0f)'%( \ len(miller_indices_sel), len(flex_fp_all), len(flex_phib_all), len(flex_fom_all), len(flex_hl_all)) miller_indices = flex.miller_index() flex_fp = flex.double() flex_sigmas = flex.double() flex_phib = flex.double() flex_fom = flex.double() flex_hl = flex.hendrickson_lattman() #format all miller arrays to the same length for miller_index in miller_indices_sel: fp_cn, phib_cn, fom_cn, hl_cn = (0,0,0,0) matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_fp].indices()) if len(matches.pairs()) > 0: fp_cn = 1 fp = flex_fp_all[matches.pairs()[0][1]] sigmas = flex_sigmas_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_phib].indices()) if len(matches.pairs()) > 0: phib_cn = 1 phib = flex_phib_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_fom].indices()) if len(matches.pairs()) > 0: fom_cn = 1 fom = flex_fom_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_hl].indices()) if len(matches.pairs()) > 0: hl_cn = 1 hl = flex_hl_all[matches.pairs()[0][1]] if (fp_cn + phib_cn + fom_cn + hl_cn) == 4: miller_indices.append(miller_index) flex_fp.append(fp) flex_sigmas.append(sigmas) flex_phib.append(phib) flex_fom.append(fom) flex_hl.append(hl) print 'No. reflections after format - indices:%6.0f fp:%6.0f phib:%6.0f fom:%6.0f HL:%6.0f)'%( \ len(miller_indices), len(flex_fp), len(flex_phib), len(flex_fom), len(flex_hl)) flex_hla = flex.double() flex_hlb = flex.double() flex_hlc = flex.double() flex_hld = flex.double() for i in range(len(flex_hl)): data_hl_row=flex_hl[i] flex_hla.append(data_hl_row[0]) flex_hlb.append(data_hl_row[1]) flex_hlc.append(data_hl_row[2]) flex_hld.append(data_hl_row[3]) ''' Read benchmark MTZ (PHICalc) for MPE calculation ''' flex_phic = flex.double([0]*len(flex_fp)) if iparams.hklrefin is not None: reflection_file = reflection_file_reader.any_reflection_file(iparams.hklrefin) miller_arrays_bench=reflection_file.as_miller_arrays() flex_phic_raw = None for i in range(len(miller_arrays_bench)): label_string = miller_arrays_bench[i].info().label_string() labels=label_string.split(',') #only look at first index string if labels[0] == iparams.column_phic: #grab PHIC if miller_arrays_bench[i].is_complex_array(): flex_phic_raw = miller_arrays_bench[i].phases(deg=True).data() else: flex_phic_raw = miller_arrays_bench[i].data() miller_indices_phic = miller_arrays_bench[i].indices() if flex_phic is not None: matches = miller.match_multi_indices( miller_indices_unique=miller_indices, miller_indices=miller_indices_phic) flex_phic = flex.double([flex_phic_raw[pair[1]] for pair in matches.pairs()]) #format miller_arrays_out miller_set=miller.set( crystal_symmetry=crystal_symmetry, indices=miller_indices, anomalous_flag=False) miller_array_out = miller_set.array( data=flex_fp, sigmas=flex_sigmas).set_observation_type_xray_amplitude() #check if Wilson B-factor is applied flex_fp_for_sort = flex_fp[:] if iparams.flag_apply_b_factor: try: #get wilson_plot from mmtbx.scaling import xtriage from libtbx.utils import null_out xtriage_args = [ iparams.data, "", "", "log=tst_xtriage_1.log" ] result = xtriage.run(args=xtriage_args, out=null_out()) ws = result.wilson_scaling print 'Wilson K=%6.2f B=%6.2f'%(ws.iso_p_scale, ws.iso_b_wilson) sin_theta_over_lambda_sq = miller_array_out.two_theta(wavelength=iparams.wavelength) \ .sin_theta_over_lambda_sq().data() wilson_expect = flex.exp(-2 * ws.iso_b_wilson * sin_theta_over_lambda_sq) flex_fp_for_sort = wilson_expect * flex_fp except Exception: print 'Error calculating Wilson scale factors. Continue without applying B-factor.' flex_d_spacings = miller_array_out.d_spacings().data() mtz_dataset = miller_array_out.as_mtz_dataset(column_root_label="FP") for data,lbl,typ in [(flex_phib, "PHIB", "P"), (flex_fom, "FOMB", "W"), (flex_hla,"HLA","A"), (flex_hlb,"HLB","A"), (flex_hlc,"HLC","A"), (flex_hld,"HLD","A"), (flex_phic,"PHIC","P")]: mtz_dataset.add_miller_array(miller_array_out.array(data=data), column_root_label=lbl, column_types=typ) miller_arrays_out = mtz_dataset.mtz_object().as_miller_arrays() ''' getting sorted indices for the selected reflections in input mtz file list_fp_sort_index: stores indices of sorted FP in descending order ''' import operator fp_sort_index= [i for (i,j) in sorted(enumerate(flex_fp_for_sort), key=operator.itemgetter(1))] fp_sort_index.reverse() """ for i in range(100): print miller_indices[fp_sort_index[i]], flex_d_spacings[fp_sort_index[i]], flex_fp[fp_sort_index[i]], flex_sigmas[fp_sort_index[i]], wilson_expect[fp_sort_index[i]] exit() """ #calculate sum of fp^2 from percent_f_squared flex_fp_squared = flex_fp ** 2 f_squared_per_stack = (iparams.percent_f_squared * np.sum(flex_fp_squared))/100 fp_sort_index_stacks = [] sum_fp_now, i_start = (0,0) for i in range(len(fp_sort_index)): i_sel = fp_sort_index[i_start:i+1] sum_fp_now = np.sum([flex_fp_squared[ii_sel] for ii_sel in i_sel]) if sum_fp_now >= f_squared_per_stack: fp_sort_index_stacks.append(fp_sort_index[i_start:i+1]) i_start = i+1 if len(fp_sort_index_stacks) == iparams.n_stacks: break txt_out = 'stack_no sum(f_squared) %total n_refl\n' for i in range(len(fp_sort_index_stacks)): sum_fp = np.sum([flex_fp_squared[ii_sel] for ii_sel in fp_sort_index_stacks[i]]) txt_out += '%6.0f %14.2f %8.2f %6.0f\n'%(i+1, sum_fp, \ (sum_fp/np.sum(flex_fp_squared))*100, len(fp_sort_index_stacks[i])) return miller_arrays_out, fp_sort_index_stacks, txt_out
def scale_frame_detail(self, result, file_name, db_mgr, out): # If the pickled integration file does not contain a wavelength, # fall back on the value given on the command line. XXX The # wavelength parameter should probably be removed from master_phil # once all pickled integration files contain it. if ("wavelength" in result): wavelength = result["wavelength"] elif (self.params.wavelength is not None): wavelength = self.params.wavelength else: # XXX Give error, or raise exception? return None assert (wavelength > 0) observations = result["observations"][0] cos_two_polar_angle = result["cos_two_polar_angle"] assert observations.size() == cos_two_polar_angle.size() tt_vec = observations.two_theta(wavelength) #print "mean tt degrees",180.*flex.mean(tt_vec.data())/math.pi cos_tt_vec = flex.cos(tt_vec.data()) sin_tt_vec = flex.sin(tt_vec.data()) cos_sq_tt_vec = cos_tt_vec * cos_tt_vec sin_sq_tt_vec = sin_tt_vec * sin_tt_vec P_nought_vec = 0.5 * (1. + cos_sq_tt_vec) F_prime = -1.0 # Hard-coded value defines the incident polarization axis P_prime = 0.5 * F_prime * cos_two_polar_angle * sin_sq_tt_vec # XXX added as a diagnostic prange = P_nought_vec - P_prime other_F_prime = 1.0 otherP_prime = 0.5 * other_F_prime * cos_two_polar_angle * sin_sq_tt_vec otherprange = P_nought_vec - otherP_prime diff2 = flex.abs(prange - otherprange) print "mean diff is", flex.mean(diff2), "range", flex.min( diff2), flex.max(diff2) # XXX done observations = observations / (P_nought_vec - P_prime) # This corrects observations for polarization assuming 100% polarization on # one axis (thus the F_prime = -1.0 rather than the perpendicular axis, 1.0) # Polarization model as described by Kahn, Fourme, Gadet, Janin, Dumas & Andre # (1982) J. Appl. Cryst. 15, 330-337, equations 13 - 15. print "Step 3. Correct for polarization." indexed_cell = observations.unit_cell() observations_original_index = observations.deep_copy() if result.get( "model_partialities", None ) is not None and result["model_partialities"][0] is not None: # some recordkeeping useful for simulations partialities_original_index = observations.customized_copy( crystal_symmetry=self.miller_set.crystal_symmetry(), data=result["model_partialities"][0]["data"], sigmas=flex.double(result["model_partialities"][0] ["data"].size()), #dummy value for sigmas indices=result["model_partialities"][0]["indices"], ).resolution_filter(d_min=self.params.d_min) assert len(observations_original_index.indices()) == len( observations.indices()) # Now manipulate the data to conform to unit cell, asu, and space group # of reference. The resolution will be cut later. # Only works if there is NOT an indexing ambiguity! observations = observations.customized_copy( anomalous_flag=not self.params.merge_anomalous, crystal_symmetry=self.miller_set.crystal_symmetry()).map_to_asu() observations_original_index = observations_original_index.customized_copy( anomalous_flag=not self.params.merge_anomalous, crystal_symmetry=self.miller_set.crystal_symmetry()) print "Step 4. Filter on global resolution and map to asu" print >> out, "Data in reference setting:" #observations.show_summary(f=out, prefix=" ") show_observations(observations, out=out) #if self.params.significance_filter.apply is True: # raise Exception("significance filter not implemented in samosa") if self.params.significance_filter.apply is True: #------------------------------------ # Apply an I/sigma filter ... accept resolution bins only if they # have significant signal; tends to screen out higher resolution observations # if the integration model doesn't quite fit N_obs_pre_filter = observations.size() N_bins_small_set = N_obs_pre_filter // self.params.significance_filter.min_ct N_bins_large_set = N_obs_pre_filter // self.params.significance_filter.max_ct # Ensure there is at least one bin. N_bins = max([ min([self.params.significance_filter.n_bins, N_bins_small_set]), N_bins_large_set, 1 ]) print "Total obs %d Choose n bins = %d" % (N_obs_pre_filter, N_bins) bin_results = show_observations(observations, out=out, n_bins=N_bins) #show_observations(observations, out=sys.stdout, n_bins=N_bins) acceptable_resolution_bins = [ bin.mean_I_sigI > self.params.significance_filter.sigma for bin in bin_results ] acceptable_nested_bin_sequences = [ i for i in xrange(len(acceptable_resolution_bins)) if False not in acceptable_resolution_bins[:i + 1] ] if len(acceptable_nested_bin_sequences) == 0: return null_data(file_name=file_name, log_out=out.getvalue(), low_signal=True) else: N_acceptable_bins = max(acceptable_nested_bin_sequences) + 1 imposed_res_filter = float(bin_results[N_acceptable_bins - 1].d_range.split()[2]) imposed_res_sel = observations.resolution_filter_selection( d_min=imposed_res_filter) observations = observations.select(imposed_res_sel) observations_original_index = observations_original_index.select( imposed_res_sel) print "New resolution filter at %7.2f" % imposed_res_filter, file_name print "N acceptable bins", N_acceptable_bins print "Old n_obs: %d, new n_obs: %d" % (N_obs_pre_filter, observations.size()) print "Step 5. Frame by frame resolution filter" # Finished applying the binwise I/sigma filter--------------------------------------- if self.params.raw_data.sdfac_auto is True: raise Exception("sdfac auto not implemented in samosa.") print "Step 6. Match to reference intensities, filter by correlation, filter out negative intensities." assert len(observations_original_index.indices()) \ == len(observations.indices()) data = frame_data(self.n_refl, file_name) data.set_indexed_cell(indexed_cell) data.d_min = observations.d_min() # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. if self.i_model is not None: assert len(self.i_model.indices()) == len(self.miller_set.indices()) \ and (self.i_model.indices() == self.miller_set.indices()).count(False) == 0 matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) use_weights = False # New facility for getting variance-weighted correlation if self.params.scaling.algorithm in ['mark1', 'levmar']: # Because no correlation is computed, the correlation # coefficient is fixed at zero. Setting slope = 1 means # intensities are added without applying a scale factor. sum_x = 0 sum_y = 0 for pair in matches.pairs(): data.n_obs += 1 if not self.params.include_negatives and observations.data()[ pair[1]] <= 0: data.n_rejected += 1 else: sum_y += observations.data()[pair[1]] N = data.n_obs - data.n_rejected # Early return if there are no positive reflections on the frame. if data.n_obs <= data.n_rejected: return null_data(file_name=file_name, log_out=out.getvalue(), low_signal=True) # Update the count for each matched reflection. This counts # reflections with non-positive intensities, too. data.completeness += matches.number_of_matches(0).as_int() data.wavelength = wavelength if not self.params.scaling.enable: # Do not scale anything print "Scale factor to an isomorphous reference PDB will NOT be applied." slope = 1.0 offset = 0.0 observations_original_index_indices = observations_original_index.indices( ) if db_mgr is None: return unpack(MINI.x) # special exit for two-color indexing kwargs = { 'wavelength': wavelength, 'beam_x': result['xbeam'], 'beam_y': result['ybeam'], 'distance': result['distance'], 'unique_file_name': data.file_name } ORI = result["current_orientation"][0] Astar = matrix.sqr(ORI.reciprocal_matrix()) kwargs['res_ori_1'] = Astar[0] kwargs['res_ori_2'] = Astar[1] kwargs['res_ori_3'] = Astar[2] kwargs['res_ori_4'] = Astar[3] kwargs['res_ori_5'] = Astar[4] kwargs['res_ori_6'] = Astar[5] kwargs['res_ori_7'] = Astar[6] kwargs['res_ori_8'] = Astar[7] kwargs['res_ori_9'] = Astar[8] assert self.params.scaling.report_ML is True kwargs['half_mosaicity_deg'] = result["ML_half_mosaicity_deg"][0] kwargs['domain_size_ang'] = result["ML_domain_size_ang"][0] frame_id_0_base = db_mgr.insert_frame(**kwargs) xypred = result["mapped_predictions"][0] indices = flex.size_t([pair[1] for pair in matches.pairs()]) sel_observations = flex.intersection(size=observations.data().size(), iselections=[indices]) set_original_hkl = observations_original_index_indices.select( flex.intersection(size=observations_original_index_indices.size(), iselections=[indices])) set_xypred = xypred.select( flex.intersection(size=xypred.size(), iselections=[indices])) kwargs = { 'hkl_id_0_base': [pair[0] for pair in matches.pairs()], 'i': observations.data().select(sel_observations), 'sigi': observations.sigmas().select(sel_observations), 'detector_x': [xy[0] for xy in set_xypred], 'detector_y': [xy[1] for xy in set_xypred], 'frame_id_0_base': [frame_id_0_base] * len(matches.pairs()), 'overload_flag': [0] * len(matches.pairs()), 'original_h': [hkl[0] for hkl in set_original_hkl], 'original_k': [hkl[1] for hkl in set_original_hkl], 'original_l': [hkl[2] for hkl in set_original_hkl] } db_mgr.insert_observation(**kwargs) print >> out, "Lattice: %d reflections" % (data.n_obs - data.n_rejected) print >> out, "average obs", sum_y / (data.n_obs - data.n_rejected), \ "average calc", sum_x / (data.n_obs - data.n_rejected) print >> out, "Rejected %d reflections with negative intensities" % \ data.n_rejected data.accept = True for pair in matches.pairs(): if not self.params.include_negatives and ( observations.data()[pair[1]] <= 0): continue Intensity = observations.data()[pair[1]] # Super-rare exception. If saved sigmas instead of I/sigmas in the ISIGI dict, this wouldn't be needed. if Intensity == 0: continue # Add the reflection as a two-tuple of intensity and I/sig(I) # to the dictionary of observations. index = self.miller_set.indices()[pair[0]] isigi = (Intensity, observations.data()[pair[1]] / observations.sigmas()[pair[1]], 1.0) if index in data.ISIGI: data.ISIGI[index].append(isigi) else: data.ISIGI[index] = [isigi] sigma = observations.sigmas()[pair[1]] variance = sigma * sigma data.summed_N[pair[0]] += 1 data.summed_wt_I[pair[0]] += Intensity / variance data.summed_weight[pair[0]] += 1 / variance data.set_log_out(out.getvalue()) return data
def run(args): cmd_line = command_line.argument_interpreter(master_params=master_phil_scope) working_phil, args = cmd_line.process_and_fetch( args=args, custom_processor="collect_remaining") working_phil.show() params = working_phil.extract() files = args from cctbx import crystal from iotbx.reflection_file_reader import any_reflection_file file_name_dict = {} wedge_id = -1 wedge_number = -1 wedge_number_to_wedge_id = {} assert params.space_group is not None assert params.unit_cell is not None space_group = params.space_group.group() unit_cell = params.unit_cell crystal_symmetry = crystal.symmetry( unit_cell=unit_cell, space_group=space_group) for file_name in files: file_name = os.path.abspath(file_name) print file_name wedge_number_ = None for s in file_name.split(os.path.sep): if s.startswith('sweep_'): wedge_number_ = int(os.path.splitext(s)[0][-3:]) print "wedge_number:", wedge_number_ break if wedge_number_ is not None: wedge_number = wedge_number_ else: wedge_number += 1 lattice_id = 1 for s in file_name.split(os.path.sep): if s.startswith('lattice_'): lattice_id = int(os.path.splitext(s)[0].split('_')[-1]) print "lattice_id:", lattice_id break wedge_id += 1 print "wedge_id: %i, wedge_number: %i, lattice_id: %i" %( wedge_id, wedge_number, lattice_id) wedge_number_to_wedge_id.setdefault(wedge_number, []) wedge_number_to_wedge_id[wedge_number].append(wedge_id) #if not intensities.crystal_symmetry().is_similar_symmetry( #crystal_symmetry, relative_length_tolerance=0.1): #continue file_name_dict[wedge_id] = file_name if params.overlaps.find_overlaps: # figure out the overlapping reflections and save the miller indices # for later on reject_hkl = {} def run_find_overlaps(args): wedge_n, wedge_ids = args result_dict = {} print "Wedge", wedge_n if len(wedge_ids) > 1: for wedge_id in wedge_ids: args = ["dials.import_xds", os.path.split(file_name_dict[wedge_id])[0], "--output='experiments_%i.json'" %wedge_id] cmd = " ".join(args) print cmd result = easy_run.fully_buffered(cmd).raise_if_errors() result.show_stdout() result.show_stderr() args = ["dials.import_xds", file_name_dict[wedge_id], "experiments_%i.json" %wedge_id, "--input=reflections", "--output='integrate_hkl_%i.pickle'" %wedge_id] cmd = " ".join(args) print cmd result = easy_run.fully_buffered(cmd).raise_if_errors() result.show_stdout() result.show_stderr() from dials.command_line import find_overlaps args = ['experiments_%i.json' %wedge_id for wedge_id in wedge_ids] args.extend(['integrate_hkl_%i.pickle' %wedge_id for wedge_id in wedge_ids]) #args.append("nproc=%s" %params.nproc) args.append("max_overlap_fraction=%f" %params.overlaps.max_overlap_fraction) args.append("max_overlap_pixels=%f" %params.overlaps.max_overlap_pixels) args.append("n_sigma=%f" %params.overlaps.n_sigma) args.append("save_overlaps=False") overlaps = find_overlaps.run(args) miller_indices = overlaps.overlapping_reflections['miller_index'] overlapping = [ miller_indices.select( overlaps.overlapping_reflections['id'] == i_lattice) for i_lattice in range(len(wedge_ids))] for wedge_id, overlaps in zip(wedge_ids, overlapping): result_dict[wedge_id] = overlaps return result_dict from libtbx import easy_mp results = easy_mp.parallel_map( func=run_find_overlaps, iterable=wedge_number_to_wedge_id.items(), processes=params.nproc, preserve_order=True, asynchronous=False, preserve_exception_message=True, ) for result in results: reject_hkl.update(result) for wedge_n, wedge_ids in wedge_number_to_wedge_id.iteritems(): for wedge in wedge_ids: cmd = """\ pointless -copy xdsin %s hklout integrate_hkl_%03.f.mtz << EOF SPACEGROUP %s EOF """ %(file_name_dict[wedge], wedge, space_group.type().lookup_symbol()) log = open('pointless_%03.f.log' %wedge, 'wb') print >> log, cmd result = easy_run.fully_buffered(command=cmd) result.show_stdout(out=log) result.show_stderr(out=log) if params.overlaps.find_overlaps: from cctbx import miller from iotbx import mtz m = mtz.object(file_name="integrate_hkl_%03.f.mtz" %wedge) orig_indices = m.extract_original_index_miller_indices() overlaps = reject_hkl.get(wedge) if overlaps is not None and len(overlaps) > 0: matches = miller.match_multi_indices(overlaps, orig_indices) before = m.n_reflections() print "before: %i reflections" %m.n_reflections() for i_ref in sorted(matches.pair_selection(1).iselection(), reverse=True): m.delete_reflection(i_ref) after = m.n_reflections() print "after: %i reflections" %m.n_reflections() m.add_history("Removed %i overlapping reflections" %len(overlaps)) m.write("integrate_hkl_%03.f.mtz" %wedge) g = glob.glob("integrate_hkl_*.mtz") if params.resolve_indexing_ambiguity: from cctbx.command_line import brehm_diederichs args = g args.append("asymmetric=1") args.append("save_plot=True") args.append("show_plot=False") brehm_diederichs.run(args) g = glob.glob("integrate_hkl_*_reindexed.mtz") for file_name in g: wedge_number = int(os.path.splitext( os.path.basename(file_name))[0].replace('_reindexed', '')[-3:]) #print wedge_number, wedge_number result = any_reflection_file(file_name) mtz_object = result.file_content() #if not mtz_object.crystals()[0].crystal_symmetry().is_similar_symmetry( #crystal_symmetry, relative_length_tolerance=0.1): #continue for batch in mtz_object.batches(): batch.set_num(batch.num() + 1000 * wedge_number) batches = mtz_object.get_column('BATCH') batches.set_values(batches.extract_values() + 1000*wedge_number) mtz_object.write("rebatch-%i.mtz" %(wedge_number)) g = glob.glob("rebatch-*.mtz") cmd = """\ pointless -copy hklin %s hklout pointless.mtz << EOF ALLOW OUTOFSEQUENCEFILES TOLERANCE 4 SPACEGROUP %s EOF """ %(" ".join(g), space_group.type().lookup_symbol()) log = open('pointless_all.log', 'wb') print >> log, cmd result = easy_run.fully_buffered(command=cmd) result.show_stdout(out=log) result.show_stderr(out=log) cmd = """\ aimless pointless.mtz << EOF OUTPUT UNMERGED TOGETHER %s EOF """ %("\n".join(params.aimless.command)) log = open('aimless.log', 'wb') print >> log, cmd result = easy_run.fully_buffered(command=cmd) result.show_stdout(out=log) result.show_stderr(out=log)
def scale_frame_detail(self, timestamp, cursor, do_inserts=True, result=None): #, file_name, db_mgr, out): if result is None: result = self.params # If the pickled integration file does not contain a wavelength, # fall back on the value given on the command line. XXX The # wavelength parameter should probably be removed from master_phil # once all pickled integration files contain it. wavelength = result["wavelength"] assert (wavelength > 0) # Do not apply polarization correction here, as this requires knowledge of # pixel size at minimum, and full detector geometry in general. The optimal # redesign would be to apply the polarization correction just after the integration # step in the integration code. print("Step 3. Correct for polarization.") observations = result["observations"][0] indexed_cell = observations.unit_cell() observations_original_index = observations.deep_copy() assert len(observations_original_index.indices()) == len( observations.indices()) # Now manipulate the data to conform to unit cell, asu, and space group # of reference. The resolution will be cut later. # Only works if there is NOT an indexing ambiguity! #observations = observations.customized_copy( # anomalous_flag=not self.params.merge_anomalous, # crystal_symmetry=self.miller_set.crystal_symmetry() # ).map_to_asu() #observations_original_index = observations_original_index.customized_copy( # anomalous_flag=not self.params.merge_anomalous, # crystal_symmetry=self.miller_set.crystal_symmetry() # ) observations = observations.customized_copy( anomalous_flag=False).map_to_asu() print("Step 4. Filter on global resolution and map to asu") #observations.show_summary(f=out, prefix=" ") from rstbx.dials_core.integration_core import show_observations show_observations(observations) print( "Step 6. Match to reference intensities, filter by correlation, filter out negative intensities." ) assert len(observations_original_index.indices()) \ == len(observations.indices()) # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. self.miller_set.show_summary(prefix="mset ") matches = match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) slope = 1.0 offset = 0.0 print(result.get("sa_parameters")[0]) have_sa_params = (type(result.get("sa_parameters")[0]) == type(dict())) observations_original_index_indices = observations_original_index.indices( ) print(list(result.keys())) kwargs = { 'wavelength': wavelength, 'beam_x': result['xbeam'], 'beam_y': result['ybeam'], 'distance': result['distance'], 'slope': slope, 'offset': offset, 'unique_file_name': timestamp, 'eventstamp': timestamp, 'sifoil': 0.0 } trial_id = self.get_trial_id(cursor) run_id = self.get_run_id(cursor) kwargs["trials_id"] = trial_id kwargs["rungroups_id"] = self.rungroup_id kwargs["runs_run_id"] = run_id kwargs["isoforms_isoform_id"] = self.isoform_id res_ori_direct = matrix.sqr(observations.unit_cell( ).orthogonalization_matrix()).transpose().elems kwargs['res_ori_1'] = res_ori_direct[0] kwargs['res_ori_2'] = res_ori_direct[1] kwargs['res_ori_3'] = res_ori_direct[2] kwargs['res_ori_4'] = res_ori_direct[3] kwargs['res_ori_5'] = res_ori_direct[4] kwargs['res_ori_6'] = res_ori_direct[5] kwargs['res_ori_7'] = res_ori_direct[6] kwargs['res_ori_8'] = res_ori_direct[7] kwargs['res_ori_9'] = res_ori_direct[8] kwargs['mosaic_block_rotation'] = result.get("ML_half_mosaicity_deg", [float("NaN")])[0] kwargs['mosaic_block_size'] = result.get("ML_domain_size_ang", [float("NaN")])[0] kwargs['ewald_proximal_volume'] = result.get("ewald_proximal_volume", [float("NaN")])[0] sql, parameters = self._insert(table='`%s_frames`' % self.db_experiment_tag, **kwargs) print(sql) print(parameters) results = {'frame': [sql, parameters, kwargs]} if do_inserts: cursor.execute(sql, parameters[0]) frame_id = cursor.lastrowid else: frame_id = None xypred = result["mapped_predictions"][0] indices = flex.size_t([pair[1] for pair in matches.pairs()]) sel_observations = flex.intersection(size=observations.data().size(), iselections=[indices]) set_original_hkl = observations_original_index_indices.select( flex.intersection(size=observations_original_index_indices.size(), iselections=[indices])) set_xypred = xypred.select( flex.intersection(size=xypred.size(), iselections=[indices])) ''' debugging printout print len(observations.data()) print len(indices) print len(sel_observations) for x in range(len(observations.data())): print x,observations.indices().select(sel_observations)[x], print set_original_hkl[x], index_into_hkl_id = matches.pairs()[x][0] print index_into_hkl_id, print self.miller_set.indices()[index_into_hkl_id], cursor.execute('SELECT H,K,L FROM %s_hkls WHERE hkl_id = %d'%( self.db_experiment_tag, self.miller_set_id[index_into_hkl_id])) print cursor.fetchall()[0] ''' print("Adding %d observations for this frame" % (len(sel_observations))) kwargs = { 'hkls_id': self.miller_set_id.select( flex.size_t([pair[0] for pair in matches.pairs()])), 'i': observations.data().select(sel_observations), 'sigi': observations.sigmas().select(sel_observations), 'detector_x_px': [xy[0] for xy in set_xypred], 'detector_y_px': [xy[1] for xy in set_xypred], 'frames_id': [frame_id] * len(matches.pairs()), 'overload_flag': [0] * len(matches.pairs()), 'original_h': [hkl[0] for hkl in set_original_hkl], 'original_k': [hkl[1] for hkl in set_original_hkl], 'original_l': [hkl[2] for hkl in set_original_hkl], 'frames_rungroups_id': [self.rungroup_id] * len(matches.pairs()), 'frames_trials_id': [trial_id] * len(matches.pairs()), 'panel': [0] * len(matches.pairs()) } if do_inserts: # For MySQLdb executemany() is six times slower than a single big # execute() unless the "values" keyword is given in lowercase # (http://sourceforge.net/p/mysql-python/bugs/305). # # See also merging_database_sqlite3._insert() query = ("INSERT INTO `%s_observations` (" % self.db_experiment_tag) \ + ", ".join(kwargs) + ") values (" \ + ", ".join(["%s"] * len(kwargs)) + ")" try: parameters = list(zip(*list(kwargs.values()))) except TypeError: parameters = [list(kwargs.values())] cursor.executemany(query, parameters) #print "done execute many" #print cursor._last_executed results['observations'] = [query, parameters, kwargs] else: # since frame_id isn't valid in the query here, don't include a sql statement or parameters array in the results results['observations'] = [None, None, kwargs] return results
def postrefine_by_frame(self, frame_no, pres_in, iparams, miller_array_ref, avg_mode): #Prepare data if pres_in is None: return None, 'Found empty pickle file' observations_pickle = pickle.load(open(pres_in.pickle_filename, "rb")) wavelength = observations_pickle["wavelength"] crystal_init_orientation = observations_pickle["current_orientation"][ 0] pickle_filename = pres_in.pickle_filename pickle_filepaths = pickle_filename.split('/') img_filename_only = pickle_filepaths[len(pickle_filepaths) - 1] txt_exception = ' {0:40} ==> '.format(img_filename_only) inputs, txt_organize_input = self.organize_input( observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename) if inputs is not None: observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, \ detector_distance_mm, identified_isoform, mapped_predictions, xbeam, ybeam = inputs else: txt_exception += txt_organize_input + '\n' return None, txt_exception #Select data for post-refinement (only select indices that are common with the reference set observations_non_polar, index_basis_name = self.get_observations_non_polar( observations_original, pickle_filename, iparams) matches = miller.match_multi_indices( miller_indices_unique=miller_array_ref.indices(), miller_indices=observations_non_polar.indices()) I_ref_match = flex.double( [miller_array_ref.data()[pair[0]] for pair in matches.pairs()]) miller_indices_ref_match = flex.miller_index( (miller_array_ref.indices()[pair[0]] for pair in matches.pairs())) I_obs_match = flex.double([ observations_non_polar.data()[pair[1]] for pair in matches.pairs() ]) sigI_obs_match = flex.double([ observations_non_polar.sigmas()[pair[1]] for pair in matches.pairs() ]) miller_indices_original_obs_match = flex.miller_index((observations_original.indices()[pair[1]] \ for pair in matches.pairs())) miller_indices_non_polar_obs_match = flex.miller_index((observations_non_polar.indices()[pair[1]] \ for pair in matches.pairs())) alpha_angle_set = flex.double( [alpha_angle[pair[1]] for pair in matches.pairs()]) spot_pred_x_mm_set = flex.double( [spot_pred_x_mm[pair[1]] for pair in matches.pairs()]) spot_pred_y_mm_set = flex.double( [spot_pred_y_mm[pair[1]] for pair in matches.pairs()]) references_sel = miller_array_ref.customized_copy( data=I_ref_match, indices=miller_indices_ref_match) observations_original_sel = observations_original.customized_copy( data=I_obs_match, sigmas=sigI_obs_match, indices=miller_indices_original_obs_match) observations_non_polar_sel = observations_non_polar.customized_copy( data=I_obs_match, sigmas=sigI_obs_match, indices=miller_indices_non_polar_obs_match) #Do least-squares refinement lsqrh = leastsqr_handler() try: refined_params, stats, n_refl_postrefined = lsqrh.optimize( I_ref_match, observations_original_sel, wavelength, crystal_init_orientation, alpha_angle_set, spot_pred_x_mm_set, spot_pred_y_mm_set, iparams, pres_in, observations_non_polar_sel, detector_distance_mm) except Exception: txt_exception += 'optimization failed.\n' return None, txt_exception #caculate partiality for output (with target_anomalous check) G_fin, B_fin, rotx_fin, roty_fin, ry_fin, rz_fin, r0_fin, re_fin, voigt_nu_fin, \ a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin = refined_params inputs, txt_organize_input = self.organize_input( observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename) observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, \ detector_distance_mm, identified_isoform, mapped_predictions, xbeam, ybeam = inputs observations_non_polar, index_basis_name = self.get_observations_non_polar( observations_original, pickle_filename, iparams) from cctbx.uctbx import unit_cell uc_fin = unit_cell( (a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin)) crystal_init_orientation = pres_in.crystal_orientation two_theta = observations_original.two_theta( wavelength=wavelength).data() ph = partiality_handler() partiality_fin, dummy, rs_fin, rh_fin = ph.calc_partiality_anisotropy_set( uc_fin, rotx_fin, roty_fin, observations_original.indices(), ry_fin, rz_fin, r0_fin, re_fin, voigt_nu_fin, two_theta, alpha_angle, wavelength, crystal_init_orientation, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence) #calculate the new crystal orientation O = sqr(uc_fin.orthogonalization_matrix()).transpose() R = sqr(crystal_init_orientation.crystal_rotation_matrix()).transpose() from cctbx.crystal_orientation import crystal_orientation, basis_type CO = crystal_orientation(O * R, basis_type.direct) crystal_fin_orientation = CO.rotate_thru( (1, 0, 0), rotx_fin).rotate_thru((0, 1, 0), roty_fin) #remove reflections with partiality below threshold i_sel = partiality_fin > iparams.merge.partiality_min partiality_fin_sel = partiality_fin.select(i_sel) rs_fin_sel = rs_fin.select(i_sel) rh_fin_sel = rh_fin.select(i_sel) observations_non_polar_sel = observations_non_polar.select(i_sel) observations_original_sel = observations_original.select(i_sel) mapped_predictions = mapped_predictions.select(i_sel) pres = postref_results() pres.set_params(observations=observations_non_polar_sel, observations_original=observations_original_sel, refined_params=refined_params, stats=stats, partiality=partiality_fin_sel, rs_set=rs_fin_sel, rh_set=rh_fin_sel, frame_no=frame_no, pickle_filename=pickle_filename, wavelength=wavelength, crystal_orientation=crystal_init_orientation, detector_distance_mm=detector_distance_mm, identified_isoform=identified_isoform, mapped_predictions=mapped_predictions, xbeam=xbeam, ybeam=ybeam) r_change, r_xy_change, cc_change, cc_iso_change = (0, 0, 0, 0) try: r_change = ((pres.R_final - pres.R_init) / pres.R_init) * 100 r_xy_change = ( (pres.R_xy_final - pres.R_xy_init) / pres.R_xy_init) * 100 cc_change = ((pres.CC_final - pres.CC_init) / pres.CC_init) * 100 cc_iso_change = ((pres.CC_iso_final - pres.CC_iso_init) / pres.CC_iso_init) * 100 except Exception: pass txt_postref = ' {0:40} ==> RES:{1:5.2f} NREFL:{2:5d} R:{3:8.2f}% RXY:{4:8.2f}% CC:{5:6.2f}% CCISO:{6:6.2f}% G:{7:10.3e} B:{8:7.1f} CELL:{9:6.2f} {10:6.2f} {11:6.2f} {12:6.2f} {13:6.2f} {14:6.2f}'.format( img_filename_only + ' (' + index_basis_name + ')', observations_original_sel.d_min(), len(observations_original_sel.data()), r_change, r_xy_change, cc_change, cc_iso_change, pres.G, pres.B, a_fin, b_fin, c_fin, alpha_fin, beta_fin, gamma_fin) print txt_postref txt_postref += '\n' return pres, txt_postref