def run_scaling_cycle(self): """Do a round of scaling for scaling and filtering.""" # Turn off the full matrix round, all else is the same. initial_full_matrix = self.params.scaling_options.full_matrix self.scaler.params.scaling_options.full_matrix = False self.scaler = scaling_algorithm(self.scaler) self.scaler.params.scaling_options.full_matrix = initial_full_matrix self.remove_bad_data() for table in self.reflections: bad = table.get_flags(table.flags.bad_for_scaling, all=False) table.unset_flags(flex.bool(table.size(), True), table.flags.scaled) table.set_flags(~bad, table.flags.scaled) self.scaled_miller_array = scaled_data_as_miller_array( self.reflections, self.experiments, anomalous_flag=False, best_unit_cell=self.params.reflection_selection.best_unit_cell, ) try: self.calculate_merging_stats() except DialsMergingStatisticsError as e: logger.info(e) logger.info("Performed cycle of scaling.")
def export_mtz_only(reflections, experiments, params): """Export data in mtz format.""" assert len( reflections) == 1, "Need a combined reflection table from scaling." if params.output.unmerged_mtz: _export_unmerged_mtz(params, experiments, reflections[0]) if params.output.merged_mtz: if len(params.output.merged_mtz) > 1: _export_multi_merged_mtz(params, experiments, reflections[0]) else: scaled_array = scaled_data_as_miller_array( reflections, experiments) merged = scaled_array.merge_equivalents( use_internal_variance=params.output.use_internal_variance ).array() merged_anom = ( scaled_array.as_anomalous_array().merge_equivalents( use_internal_variance=params.output. use_internal_variance).array()) mtz_file = make_merged_mtz_file(merged, merged_anom) logger.info( "\nSaving output to a merged mtz file to %s.\n", params.output.merged_mtz[0], ) mtz_file.write(params.output.merged_mtz[0])
def run(self): """Run the scaling script.""" with ScalingHTMLContextManager(self), ScalingSummaryContextManager( self): start_time = time.time() self.scale() self.remove_bad_data() if not self.experiments: raise ValueError("All data sets have been rejected as bad.") for table in self.reflections: bad = table.get_flags(table.flags.bad_for_scaling, all=False) table.unset_flags(flex.bool(table.size(), True), table.flags.scaled) table.set_flags(~bad, table.flags.scaled) self.scaled_miller_array = scaled_data_as_miller_array( self.reflections, self.experiments, anomalous_flag=False, best_unit_cell=self.params.reflection_selection.best_unit_cell, ) try: self.calculate_merging_stats() except DialsMergingStatisticsError as e: logger.info(e) # All done! logger.info("\nTotal time taken: %.4fs ", time.time() - start_time) logger.info("%s%s%s", "\n", "=" * 80, "\n")
def test_scaled_data_as_miller_array(dials_data): location = dials_data("l_cysteine_4_sweeps_scaled", pathlib=True) reflections = flex.reflection_table.from_file(location / "scaled_20_25.refl") experiments = ExperimentList.from_file( location / "scaled_20_25.expt", check_format=False ) reflections = [reflections.select(reflections["id"] == i) for i in range(2)] print([refl.size() for refl in reflections]) miller_array = scaled_data_as_miller_array(reflections, experiments) assert miller_array.size() == 5503 assert miller_array.info().wavelength == experiments[0].beam.get_wavelength() miller_array = scaled_data_as_miller_array(reflections, experiments, wavelength=1) assert miller_array.size() == 5503 assert miller_array.info().wavelength == 1
def reflection_tables_to_batch_dependent_properties(reflection_tables, experiments, scaled_array=None): """Extract batch dependent properties from a reflection table list.""" offsets = calculate_batch_offsets(experiments) reflection_tables = assign_batches_to_reflections(reflection_tables, offsets) # filter bad refls and negative scales batches = flex.int() scales = flex.double() for r in reflection_tables: sel = ~r.get_flags(r.flags.bad_for_scaling, all=False) sel &= r["inverse_scale_factor"] > 0 batches.extend(r["batch"].select(sel)) scales.extend(r["inverse_scale_factor"].select(sel)) if not scaled_array: scaled_array = scaled_data_as_miller_array(reflection_tables, experiments) ms = scaled_array.customized_copy() batch_array = miller.array(ms, data=batches) batch_ranges = get_batch_ranges(experiments, offsets) batch_data = [{"id": i, "range": r} for i, r in enumerate(batch_ranges)] properties = batch_dependent_properties(batch_array, scaled_array, miller.array(ms, data=scales)) return properties + (batch_data, )
def export(self): """Save the experiments json and scaled pickle file.""" logger.info("%s%s%s", "\n", "=" * 80, "\n") save_experiments(self.experiments, self.params.output.experiments) # Now create a joint reflection table. Delete all other data before # joining reflection tables - just need experiments for mtz export # and a reflection table. self.delete_datastructures() joint_table = flex.reflection_table() for i in range(len(self.reflections)): joint_table.extend(self.reflections[i]) # del reflection_table self.reflections[i] = 0 gc.collect() # remove reflections with neg sigma sel = joint_table["inverse_scale_factor"] <= 0.0 good_sel = ~joint_table.get_flags(joint_table.flags.bad_for_scaling, all=False) n_neg = (good_sel & sel).count(True) if n_neg > 0: logger.warning( """ Warning: %s non-excluded reflections were assigned negative scale factors during scaling. These will be set as outliers in the reflection table. It may be best to rerun scaling from this point for an improved model.""", n_neg, ) joint_table.set_flags(sel, joint_table.flags.outlier_in_scaling) save_reflections(joint_table, self.params.output.reflections) if self.params.output.unmerged_mtz: _export_unmerged_mtz(self.params, self.experiments, joint_table) if self.params.output.merged_mtz: if len(self.params.output.merged_mtz) > 1: _export_multi_merged_mtz(self.params, self.experiments, joint_table) else: anomalous_scaled = scaled_data_as_miller_array( [joint_table], self.experiments, anomalous_flag=True) merged = self.scaled_miller_array.merge_equivalents( use_internal_variance=self.params.output. use_internal_variance).array() merged_anom = anomalous_scaled.merge_equivalents( use_internal_variance=self.params.output. use_internal_variance).array() mtz_file = make_merged_mtz_file(merged, merged_anom) logger.info( "\nSaving output to a merged mtz file to %s.\n", self.params.output.merged_mtz[0], ) mtz_file.write(self.params.output.merged_mtz[0])
def test_script(test_data): script = mock.Mock() refls, expts, params = test_data script.reflections = refls script.experiments = expts script.params = params script.scaled_miller_array = scaled_data_as_miller_array(refls, expts) script.merging_statistics_result = None script.anom_merging_statistics_result = None script.filtering_results = None return script
def run_scaling_cycle(self): """Do a round of scaling for scaling and filtering.""" # Turn off the full matrix round, all else is the same. initial_full_matrix = self.params.scaling_options.full_matrix self.scaler.params.scaling_options.full_matrix = False self.scaler = scaling_algorithm(self.scaler) self.scaler.params.scaling_options.full_matrix = initial_full_matrix self.remove_unwanted_datasets() self.scaled_miller_array = scaled_data_as_miller_array( self.reflections, self.experiments, anomalous_flag=False) try: self.calculate_merging_stats() except DialsMergingStatisticsError as e: logger.info(e) logger.info("Performed cycle of scaling.")
def stats_only(reflections, experiments, params): """Calculate and print merging stats.""" best_unit_cell = params.reflection_selection.best_unit_cell if not params.reflection_selection.best_unit_cell: best_unit_cell = determine_best_unit_cell(experiments) scaled_miller_array = scaled_data_as_miller_array( reflections, experiments, best_unit_cell=best_unit_cell) try: res, _ = merging_stats_from_scaled_array( scaled_miller_array, params.output.merging.nbins, params.output.use_internal_variance, ) logger.info(make_merging_statistics_summary(res)) except DialsMergingStatisticsError as e: logger.info(e)
def run(self): """Run the scaling script.""" start_time = time.time() self.scale() self.remove_unwanted_datasets() self.scaled_miller_array = scaled_data_as_miller_array( self.reflections, self.experiments, anomalous_flag=False) try: self.calculate_merging_stats() except DialsMergingStatisticsError as e: logger.info(e) # All done! logger.info("\nTotal time taken: {:.4f}s ".format(time.time() - start_time)) logger.info("%s%s%s", "\n", "=" * 80, "\n")
def _export_multi_merged_mtz(params, experiments, reflection_table): from dxtbx.model import ExperimentList wavelengths = match_wavelengths(experiments) assert len(params.output.merged_mtz) == len(wavelengths.keys()) for filename, wavelength in zip(params.output.merged_mtz, wavelengths.keys()): exps = ExperimentList() ids = [] for i, exp in enumerate(experiments): if i in wavelengths[wavelength]: exps.append(exp) ids.append(exp.identifier) refls = reflection_table.select_on_experiment_identifiers(ids) scaled_array = scaled_data_as_miller_array([refls], exps) merged = scaled_array.merge_equivalents( use_internal_variance=params.output.use_internal_variance).array() merged_anom = (scaled_array.as_anomalous_array().merge_equivalents( use_internal_variance=params.output.use_internal_variance).array()) mtz_file = make_merged_mtz_file(merged, merged_anom) logger.info("\nSaving output to a merged mtz file to %s.\n", filename) mtz_file.write(filename)
def run(self): """Run the scaling script.""" start_time = time.time() self.scale() self.remove_bad_data() if not self.experiments: raise ValueError("All data sets have been rejected as bad.") self.scaled_miller_array = scaled_data_as_miller_array( self.reflections, self.experiments, anomalous_flag=False, best_unit_cell=self.params.reflection_selection.best_unit_cell, ) try: self.calculate_merging_stats() except DialsMergingStatisticsError as e: logger.info(e) # All done! logger.info("\nTotal time taken: {:.4f}s ".format(time.time() - start_time)) logger.info("%s%s%s", "\n", "=" * 80, "\n")
def test_table_1_summary(dials_data): location = dials_data("l_cysteine_4_sweeps_scaled") expts = load.experiment_list(location.join("scaled_20_25.expt"), check_format=False) refls = flex.reflection_table.from_file(location.join("scaled_20_25.refl")) # Get a miller array of real data and calculate an iotbx.merging_statistics ma = scaled_data_as_miller_array([refls], expts) arr, anom = merging_stats_from_scaled_array(ma) # Test that something is returned in each case ### Case of overall statistics summary out = table_1_summary(arr, anom) assert out assert all(a in out for a in ("Overall", "Low", "High")) assert "Suggested" not in out ### Case of overall and suggested statistics summary (with anom) out = table_1_summary(arr, anom, arr, anom) assert out assert all(a in out for a in ("Overall", "Suggested", "Low", "High")) ### Case of no anomalous, but with suggested as well as overall. out = table_1_summary(arr, selected_statistics=arr) assert out assert all(a in out for a in ("Overall", "Suggested", "Low", "High"))
def merge( experiments, reflections, d_min=None, d_max=None, combine_partials=True, partiality_threshold=0.4, best_unit_cell=None, anomalous=True, use_internal_variance=False, assess_space_group=False, n_bins=20, ): """ Merge reflection table data and generate a summary of the merging statistics. This procedure filters the input data, merges the data (normal and optionally anomalous), assesses the space group symmetry and generates a summary of the merging statistics. """ logger.info("\nMerging scaled reflection data\n") # first filter bad reflections using dials.util.filter methods reflections = filter_reflection_table( reflections, intensity_choice=["scale"], d_min=d_min, d_max=d_max, combine_partials=combine_partials, partiality_threshold=partiality_threshold, ) # ^ scale factor has been applied, so now set to 1.0 - okay as not # going to output scale factor in merged mtz. reflections["inverse_scale_factor"] = flex.double(reflections.size(), 1.0) scaled_array = scaled_data_as_miller_array([reflections], experiments, best_unit_cell) # Note, merge_equivalents does not raise an error if data is unique. merged = scaled_array.merge_equivalents( use_internal_variance=use_internal_variance).array() merged_anom = None if anomalous: anomalous_scaled = scaled_array.as_anomalous_array() merged_anom = anomalous_scaled.merge_equivalents( use_internal_variance=use_internal_variance).array() # Before merge, do assessment of the space_group if assess_space_group: merged_reflections = flex.reflection_table() merged_reflections["intensity"] = merged.data() merged_reflections["variance"] = flex.pow2(merged.sigmas()) merged_reflections["miller_index"] = merged.indices() logger.info("Running systematic absences check") run_systematic_absences_checks(experiments, merged_reflections) try: stats, anom_stats = merging_stats_from_scaled_array( scaled_array, n_bins, use_internal_variance, ) except DialsMergingStatisticsError as e: logger.error(e, exc_info=True) stats_summary = None else: stats_summary = make_merging_statistics_summary(stats) stats_summary += table_1_summary(stats, anom_stats) return merged, merged_anom, stats_summary
def merge_and_truncate(params, experiments, reflections): """Filter data, assess space group, run french wilson and Wilson stats.""" logger.info("\nMerging scaled reflection data\n") # first filter bad reflections using dials.util.filter methods reflections = filter_reflection_table( reflections[0], intensity_choice=["scale"], d_min=params.d_min, combine_partials=params.combine_partials, partiality_threshold=params.partiality_threshold, ) # ^ scale factor has been applied, so now set to 1.0 - okay as not # going to output scale factor in merged mtz. reflections["inverse_scale_factor"] = flex.double(reflections.size(), 1.0) scaled_array = scaled_data_as_miller_array([reflections], experiments) if params.anomalous: anomalous_scaled = scaled_array.as_anomalous_array() merged = scaled_array.merge_equivalents( use_internal_variance=params.merging.use_internal_variance).array() merged_anom = None if params.anomalous: merged_anom = anomalous_scaled.merge_equivalents( use_internal_variance=params.merging.use_internal_variance).array( ) # Before merge, do some assessment of the space_group if params.assess_space_group: merged_reflections = flex.reflection_table() merged_reflections["intensity"] = merged.data() merged_reflections["variance"] = merged.sigmas()**2 merged_reflections["miller_index"] = merged.indices() logger.info("Running systematic absences check") run_sys_abs_checks(experiments, merged_reflections) # Run the stats on truncating on anomalous or non anomalous? if params.anomalous: intensities = merged_anom else: intensities = merged assert intensities.is_xray_intensity_array() amplitudes = None anom_amplitudes = None if params.truncate: logger.info("\nScaling input intensities via French-Wilson Method") out = StringIO() if params.anomalous: anom_amplitudes = intensities.french_wilson(params=params, log=out) n_removed = intensities.size() - anom_amplitudes.size() assert anom_amplitudes.is_xray_amplitude_array() amplitudes = anom_amplitudes.as_non_anomalous_array() amplitudes = amplitudes.merge_equivalents().array() else: amplitudes = intensities.french_wilson(params=params, log=out) n_removed = intensities.size() - amplitudes.size() logger.info("Total number of rejected intensities %s", n_removed) logger.debug(out.getvalue()) if params.reporting.wilson_stats: if not intensities.space_group().is_centric(): wilson_scaling = data_statistics.wilson_scaling( miller_array=intensities, n_residues=params.n_residues) # XXX default n_residues? # Divert output through logger - do with StringIO rather than # info_handle else get way too much whitespace in output. out = StringIO() wilson_scaling.show(out=out) logger.info(out.getvalue()) # Apply wilson B to give absolute scale? # Show merging stats again. if params.reporting.merging_stats: stats, anom_stats = merging_stats_from_scaled_array( scaled_array, params.merging.n_bins, params.merging.use_internal_variance) if params.merging.anomalous: logger.info(make_merging_statistics_summary(anom_stats)) else: logger.info(make_merging_statistics_summary(stats)) return merged, merged_anom, amplitudes, anom_amplitudes
filenames = ( "/Users/whi10850/Documents/test_data/multi_example/integrated_files/inc_test/1/", "/Users/whi10850/Documents/test_data/multi_example/integrated_files/inc_test/2/", ) cc_one_half_data = [] i_over_sig_data = [] for filepath in filenames: experiments = load.experiment_list(filepath + "scaled_experiments.json", check_format=False) reflections = flex.reflection_table.from_pickle(filepath + "scaled.pickle") print("memory: %s" % int(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)) arr = scaled_data_as_miller_array([reflections], experiments) norm_stats, anom_stats = merging_stats_from_scaled_array(arr) plotter = ResolutionPlotsAndStats(norm_stats, anom_stats) resolution_plots = plotter.make_all_plots() cc_one_half_data.append(resolution_plots["cc_one_half"]) i_over_sig_data.append(resolution_plots["i_over_sig_i"]) del experiments del reflections gc.collect() print("added data for %s" % filepath) data = {"cc_one_half": cc_one_half_data, "i_over_sigma": i_over_sig_data} with open("incremental_data.json", "w") as f: json.dump(data, f, indent=True)